Commit 9cacc587215ba16ccaea4e7ff865ad40a36596ac
1 parent
f1e52ff4
- dodanie opcji aggl i praet
- dodanie obsługi CLI w morfeusz_analyzer - ogólne ogarnięcie generatora git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@115 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
21 changed files
with
2690 additions
and
275 deletions
CMakeLists.txt
@@ -5,7 +5,7 @@ project (Morfeusz) | @@ -5,7 +5,7 @@ project (Morfeusz) | ||
5 | set (Morfeusz_VERSION_MAJOR 2) | 5 | set (Morfeusz_VERSION_MAJOR 2) |
6 | set (Morfeusz_VERSION_MINOR 0) | 6 | set (Morfeusz_VERSION_MINOR 0) |
7 | set (Morfeusz_VERSION_PATCH 0) | 7 | set (Morfeusz_VERSION_PATCH 0) |
8 | -set (CMAKE_BUILD_TYPE "Release") | 8 | +set (CMAKE_BUILD_TYPE "Debug") |
9 | 9 | ||
10 | enable_testing() | 10 | enable_testing() |
11 | 11 | ||
@@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "") | @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "") | ||
36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") | 36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) | 37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
38 | else () | 38 | else () |
39 | - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | 39 | + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") |
40 | endif () | 40 | endif () |
41 | endif () | 41 | endif () |
42 | 42 | ||
@@ -109,7 +109,7 @@ set (CPACK_OUTPUT_FILE_PREFIX "${TARGET_DIR}") | @@ -109,7 +109,7 @@ set (CPACK_OUTPUT_FILE_PREFIX "${TARGET_DIR}") | ||
109 | if (${CMAKE_SYSTEM_NAME} MATCHES "Linux") | 109 | if (${CMAKE_SYSTEM_NAME} MATCHES "Linux") |
110 | set (CPACK_GENERATOR "DEB" "TGZ") | 110 | set (CPACK_GENERATOR "DEB" "TGZ") |
111 | #debian | 111 | #debian |
112 | - set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz") | 112 | + set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz2") |
113 | set (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") | 113 | set (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") |
114 | set (CPACK_DEBIAN_PACKAGE_DEPENDS "libstdc++6 (>= 4.6)") | 114 | set (CPACK_DEBIAN_PACKAGE_DEPENDS "libstdc++6 (>= 4.6)") |
115 | set (CPACK_DEBIAN_PACKAGE_ARCHITECTURE "${ARCHITECTURE}") | 115 | set (CPACK_DEBIAN_PACKAGE_ARCHITECTURE "${ARCHITECTURE}") |
fsabuilder/buildfsa.py
@@ -261,9 +261,11 @@ def main(opts): | @@ -261,9 +261,11 @@ def main(opts): | ||
261 | if __name__ == '__main__': | 261 | if __name__ == '__main__': |
262 | import os | 262 | import os |
263 | opts = _parseOptions() | 263 | opts = _parseOptions() |
264 | -# try: | ||
265 | - main(opts) | 264 | + try: |
265 | + main(opts) | ||
266 | # except Exception as ex: | 266 | # except Exception as ex: |
267 | -# raise ex | ||
268 | # print >> sys.stderr, unicode(ex).encode('utf8') | 267 | # print >> sys.stderr, unicode(ex).encode('utf8') |
268 | +# sys.exit(1) | ||
269 | + finally: | ||
270 | + pass | ||
269 | 271 |
fsabuilder/morfeuszbuilder/fsa/encode.py
@@ -26,7 +26,6 @@ class Encoder(object): | @@ -26,7 +26,6 @@ class Encoder(object): | ||
26 | 26 | ||
27 | def encodeData(self, data): | 27 | def encodeData(self, data): |
28 | raise NotImplementedError() | 28 | raise NotImplementedError() |
29 | -# return bytearray(u'|'.join(data).encode(self.encoding)) + bytearray([0]) | ||
30 | 29 | ||
31 | def decodeData(self, rawData): | 30 | def decodeData(self, rawData): |
32 | return NotImplementedError() | 31 | return NotImplementedError() |
@@ -134,16 +133,8 @@ class Encoder(object): | @@ -134,16 +133,8 @@ class Encoder(object): | ||
134 | 133 | ||
135 | for typenum, interpsList in segnum2Interps.iteritems(): | 134 | for typenum, interpsList in segnum2Interps.iteritems(): |
136 | res.extend(self._encodeInterps4Type(typenum, interpsList, withCasePattern, withPrefix)) | 135 | res.extend(self._encodeInterps4Type(typenum, interpsList, withCasePattern, withPrefix)) |
137 | - | ||
138 | - | ||
139 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | ||
140 | -# encodedInterpsList.extend(self._encodeTypeNum(interp.typenum)) | ||
141 | -# encodedInterpsList.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=withCasePattern, withPrefix=withPrefix)) | ||
142 | -# encodedInterpsList.extend(self._encodeTagNum(interp.tagnum)) | ||
143 | -# encodedInterpsList.extend(self._encodeNameNum(interp.namenum)) | ||
144 | del interpsList | 136 | del interpsList |
145 | -# res.extend(serializationUtils.htons(len(encodedInterpsList))) | ||
146 | -# res.extend(encodedInterpsList) | 137 | + |
147 | return res | 138 | return res |
148 | 139 | ||
149 | class MorphEncoder(Encoder): | 140 | class MorphEncoder(Encoder): |
@@ -156,19 +147,6 @@ class MorphEncoder(Encoder): | @@ -156,19 +147,6 @@ class MorphEncoder(Encoder): | ||
156 | 147 | ||
157 | def encodeData(self, interpsList): | 148 | def encodeData(self, interpsList): |
158 | return self._doEncodeData(interpsList, withCasePattern=True, withPrefix=False) | 149 | return self._doEncodeData(interpsList, withCasePattern=True, withPrefix=False) |
159 | -# res = bytearray() | ||
160 | -# firstByte = len(interpsList) | ||
161 | -# assert firstByte < 256 | ||
162 | -# assert firstByte > 0 | ||
163 | -# res.append(firstByte) | ||
164 | -# assert type(interpsList) == frozenset | ||
165 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | ||
166 | -# res.extend(self._encodeTypeNum(interp.typenum)) | ||
167 | -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=True, withPrefix=False)) | ||
168 | -# res.extend(self._encodeTagNum(interp.tagnum)) | ||
169 | -# res.extend(self._encodeNameNum(interp.namenum)) | ||
170 | -# del interpsList | ||
171 | -# return res | ||
172 | 150 | ||
173 | class Encoder4Generator(Encoder): | 151 | class Encoder4Generator(Encoder): |
174 | 152 | ||
@@ -177,18 +155,3 @@ class Encoder4Generator(Encoder): | @@ -177,18 +155,3 @@ class Encoder4Generator(Encoder): | ||
177 | 155 | ||
178 | def encodeData(self, interpsList): | 156 | def encodeData(self, interpsList): |
179 | return self._doEncodeData(interpsList, withCasePattern=False, withPrefix=True) | 157 | return self._doEncodeData(interpsList, withCasePattern=False, withPrefix=True) |
180 | -# res = bytearray() | ||
181 | -# firstByte = len(interpsList) | ||
182 | -# assert firstByte < 256 | ||
183 | -# assert firstByte > 0 | ||
184 | -# res.append(firstByte) | ||
185 | -# assert type(interpsList) == frozenset | ||
186 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | ||
187 | -# res.extend(self._encodeTypeNum(interp.typenum)) | ||
188 | -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=False, withPrefix=True)) | ||
189 | -# res.extend(self._encodeTagNum(interp.tagnum)) | ||
190 | -# res.extend(self._encodeNameNum(interp.namenum)) | ||
191 | -# return res | ||
192 | -# | ||
193 | -# def decodeData(self, data): | ||
194 | -# |
fsabuilder/morfeuszbuilder/segrules/rulesManager.py
@@ -11,6 +11,7 @@ class RulesManager(object): | @@ -11,6 +11,7 @@ class RulesManager(object): | ||
11 | def __init__(self, segtypes): | 11 | def __init__(self, segtypes): |
12 | self.options2DFA = {} | 12 | self.options2DFA = {} |
13 | self.segtypes = segtypes | 13 | self.segtypes = segtypes |
14 | + self.defaultOptions = None | ||
14 | 15 | ||
15 | def _options2Key(self, optionsMap): | 16 | def _options2Key(self, optionsMap): |
16 | return frozenset(optionsMap.items()) | 17 | return frozenset(optionsMap.items()) |
@@ -21,6 +22,9 @@ class RulesManager(object): | @@ -21,6 +22,9 @@ class RulesManager(object): | ||
21 | def getDFA(self, optionsMap): | 22 | def getDFA(self, optionsMap): |
22 | return self.options2DFA[self._options2Key(optionsMap)] | 23 | return self.options2DFA[self._options2Key(optionsMap)] |
23 | 24 | ||
25 | + def setDefaultOptions(self, key2Def): | ||
26 | + self.defaultOptions = key2Def | ||
27 | + | ||
24 | def addDFA(self, optionsMap, dfa): | 28 | def addDFA(self, optionsMap, dfa): |
25 | self.options2DFA[self._options2Key(optionsMap)] = dfa | 29 | self.options2DFA[self._options2Key(optionsMap)] = dfa |
26 | 30 | ||
@@ -40,13 +44,17 @@ class RulesManager(object): | @@ -40,13 +44,17 @@ class RulesManager(object): | ||
40 | optionsMap = self._key2Options(key) | 44 | optionsMap = self._key2Options(key) |
41 | res.extend(self._serializeOptionsMap(optionsMap)) | 45 | res.extend(self._serializeOptionsMap(optionsMap)) |
42 | res.extend(self._serializeDFA(dfa)) | 46 | res.extend(self._serializeDFA(dfa)) |
47 | + res.extend(self._serializeOptionsMap(self.defaultOptions)) | ||
43 | logging.info('segmentation rules size: %s bytes', len(res)) | 48 | logging.info('segmentation rules size: %s bytes', len(res)) |
44 | return res | 49 | return res |
45 | 50 | ||
46 | def _serializeOptionsMap(self, optionsMap): | 51 | def _serializeOptionsMap(self, optionsMap): |
47 | assert len(optionsMap) < 256 | 52 | assert len(optionsMap) < 256 |
48 | res = bytearray() | 53 | res = bytearray() |
54 | + res.append(2) | ||
55 | + res.extend(self._serializeString('aggl')) | ||
49 | res.extend(self._serializeString(optionsMap['aggl'])) | 56 | res.extend(self._serializeString(optionsMap['aggl'])) |
57 | + res.extend(self._serializeString('praet')) | ||
50 | res.extend(self._serializeString(optionsMap['praet'])) | 58 | res.extend(self._serializeString(optionsMap['praet'])) |
51 | return res | 59 | return res |
52 | 60 | ||
@@ -63,4 +71,4 @@ class RulesManager(object): | @@ -63,4 +71,4 @@ class RulesManager(object): | ||
63 | # res.append(len(string)) | 71 | # res.append(len(string)) |
64 | res.extend(string.encode('utf8')) | 72 | res.extend(string.encode('utf8')) |
65 | res.append(0) | 73 | res.append(0) |
66 | - return res | ||
67 | \ No newline at end of file | 74 | \ No newline at end of file |
75 | + return res |
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
@@ -46,14 +46,14 @@ class RulesParser(object): | @@ -46,14 +46,14 @@ class RulesParser(object): | ||
46 | def2Key[define] = key | 46 | def2Key[define] = key |
47 | 47 | ||
48 | firstNFA = None | 48 | firstNFA = None |
49 | - for defs in itertools.product(*key2Defs.values()): | 49 | + for idx, defs in enumerate(itertools.product(*key2Defs.values())): |
50 | key2Def = dict([(def2Key[define], define) for define in defs]) | 50 | key2Def = dict([(def2Key[define], define) for define in defs]) |
51 | # print key2Def | 51 | # print key2Def |
52 | nfa = rulesNFA.RulesNFA() | 52 | nfa = rulesNFA.RulesNFA() |
53 | if not firstNFA: | 53 | if not firstNFA: |
54 | firstNFA = nfa | 54 | firstNFA = nfa |
55 | section = 'combinations' if self.rulesType == RulesParser.PARSE4ANALYZER else 'generator combinations' | 55 | section = 'combinations' if self.rulesType == RulesParser.PARSE4ANALYZER else 'generator combinations' |
56 | - combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section) | 56 | + combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section, ignoreComments=False) |
57 | combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs, filename)) | 57 | combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs, filename)) |
58 | for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename): | 58 | for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename): |
59 | # print rule | 59 | # print rule |
@@ -64,6 +64,8 @@ class RulesParser(object): | @@ -64,6 +64,8 @@ class RulesParser(object): | ||
64 | # dfa.debug() | 64 | # dfa.debug() |
65 | # print dfa.tryToRecognize(bytearray([14])) | 65 | # print dfa.tryToRecognize(bytearray([14])) |
66 | res.addDFA(key2Def, dfa) | 66 | res.addDFA(key2Def, dfa) |
67 | + if idx == 0: | ||
68 | + res.setDefaultOptions(key2Def) | ||
67 | return res | 69 | return res |
68 | 70 | ||
69 | def _doParse(self, combinationEnumeratedLines, segtypesHelper, filename): | 71 | def _doParse(self, combinationEnumeratedLines, segtypesHelper, filename): |
fsabuilder/morfeuszbuilder/utils/configFile.py
@@ -46,8 +46,13 @@ class ConfigFile(object): | @@ -46,8 +46,13 @@ class ConfigFile(object): | ||
46 | else: | 46 | else: |
47 | return None | 47 | return None |
48 | 48 | ||
49 | - def enumerateLinesInSection(self, sectionName): | ||
50 | - return self.section2Lines[sectionName] | 49 | + def enumerateLinesInSection(self, sectionName, ignoreComments=True): |
50 | + if sectionName not in self.section2Lines: | ||
51 | + raise exceptions.ConfigFileException(self.filename, None, u'Missing section: "%s"' % sectionName) | ||
52 | + if not ignoreComments: | ||
53 | + return self.section2Lines[sectionName] | ||
54 | + else: | ||
55 | + return [(linenum, line) for (linenum, line) in self.section2Lines[sectionName] if not line.startswith('#')] | ||
51 | 56 | ||
52 | def _parse(self): | 57 | def _parse(self): |
53 | with codecs.open(self.filename, 'r', 'utf8') as f: | 58 | with codecs.open(self.filename, 'r', 'utf8') as f: |
fsabuilder/morfeuszbuilder/utils/exceptions.py
@@ -31,4 +31,7 @@ class ConfigFileException(FSABuilderException): | @@ -31,4 +31,7 @@ class ConfigFileException(FSABuilderException): | ||
31 | self.msg = msg | 31 | self.msg = msg |
32 | 32 | ||
33 | def __str__(self): | 33 | def __str__(self): |
34 | - return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg) | 34 | + if self.lineNum: |
35 | + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg) | ||
36 | + else: | ||
37 | + return u'%s - %s' % (self.filename, self.msg) |
input/segmenty.dat
@@ -3,6 +3,7 @@ aggl=permissive strict isolated | @@ -3,6 +3,7 @@ aggl=permissive strict isolated | ||
3 | praet=split composite | 3 | praet=split composite |
4 | 4 | ||
5 | [combinations] | 5 | [combinations] |
6 | + | ||
6 | #define wsz_interp (interp|kropka|dywiz)* | 7 | #define wsz_interp (interp|kropka|dywiz)* |
7 | 8 | ||
8 | #define moze_interp(segmenty) wsz_interp segmenty wsz_interp | 9 | #define moze_interp(segmenty) wsz_interp segmenty wsz_interp |
@@ -19,7 +20,7 @@ samotny | @@ -19,7 +20,7 @@ samotny | ||
19 | # przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”: | 20 | # przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”: |
20 | moze_interp(praet_sg_na) | 21 | moze_interp(praet_sg_na) |
21 | 22 | ||
22 | -# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „moze”: | 23 | +# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „czytał”: |
23 | moze_interp(praet_sg) | 24 | moze_interp(praet_sg) |
24 | 25 | ||
25 | # przeszlik mnogi, np. „czytali”: | 26 | # przeszlik mnogi, np. „czytali”: |
@@ -69,7 +70,7 @@ moze_interp(praet_sg by aglsg) | @@ -69,7 +70,7 @@ moze_interp(praet_sg by aglsg) | ||
69 | # np. „gnietli·by·śmy” | 70 | # np. „gnietli·by·śmy” |
70 | moze_interp(praet_pl by aglpl) | 71 | moze_interp(praet_pl by aglpl) |
71 | #else | 72 | #else |
72 | -# moze_interp(praetcond) | 73 | +#moze_interp(praetcond) |
73 | #endif | 74 | #endif |
74 | # np. „by·ś” | 75 | # np. „by·ś” |
75 | moze_interp(by aglsg) | 76 | moze_interp(by aglsg) |
@@ -97,13 +98,13 @@ moze_interp( (adja dywiz)+ adj ) | @@ -97,13 +98,13 @@ moze_interp( (adja dywiz)+ adj ) | ||
97 | # adja dywiz adja dywiz adja dywiz adj interp? | 98 | # adja dywiz adja dywiz adja dywiz adj interp? |
98 | # adja dywiz adja dywiz adja dywiz adja dywiz adj interp? | 99 | # adja dywiz adja dywiz adja dywiz adja dywiz adj interp? |
99 | 100 | ||
100 | -# Formy zanegowane stopnia wyższego przymiotników i przysłówków (WK) | ||
101 | -# np. „nie·grzeczniejszy”, „nie·grzeczniej” | ||
102 | -moze_interp( nie> adj_com ) | 101 | +# Stopień najwyższy: |
102 | +# np. „naj·zieleńszy”, „naj·mądrzej” | ||
103 | +moze_interp( naj> adj_sup ) | ||
103 | 104 | ||
104 | # Formy „zanegowane” gerundiów i imiesłowów: | 105 | # Formy „zanegowane” gerundiów i imiesłowów: |
105 | # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”: | 106 | # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”: |
106 | -moze_interp( nie> negat ) | 107 | +moze_interp( nie > negat ) |
107 | 108 | ||
108 | # Przyimki akceptujące krótką formę „-ń” | 109 | # Przyimki akceptujące krótką formę „-ń” |
109 | moze_interp(z_on_agl) | 110 | moze_interp(z_on_agl) |
@@ -115,17 +116,11 @@ moze_interp( dig>* dig ) | @@ -115,17 +116,11 @@ moze_interp( dig>* dig ) | ||
115 | 116 | ||
116 | # Formacje prefiksalne | 117 | # Formacje prefiksalne |
117 | #### trzeba wydzielić odpowiednie samodze! | 118 | #### trzeba wydzielić odpowiednie samodze! |
118 | -# rzeczownikowe | ||
119 | -# np. „euro·sodoma”, „e-·papieros” | ||
120 | -moze_interp(nomina) | ||
121 | -moze_interp( prefs> nomina ) | 119 | +# rzeczownikowe i przymiotnikowe |
120 | +# np. „euro·sodoma”, „e-·papieros”, „euro·sodomski”, „bez·argumentowy” | ||
121 | +moze_interp( prefs> samodz ) | ||
122 | # czasownikowe np. „po·nakapywać” | 122 | # czasownikowe np. „po·nakapywać” |
123 | -moze_interp(verba_imperf) | ||
124 | -moze_interp( prefv> verba_imperf ) | ||
125 | -# przymiotnikowe np. „do·żylny”, „euro·sodomski”, „bez·argumentowy” | ||
126 | -moze_interp(adjectiva) | ||
127 | -moze_interp(prefa> adj) | ||
128 | -moze_interp( prefa> adjectiva ) | 123 | +moze_interp( prefv> samodz ) |
129 | 124 | ||
130 | # Apozycje z dywizem | 125 | # Apozycje z dywizem |
131 | # np. „kobieta-prezydent” | 126 | # np. „kobieta-prezydent” |
@@ -138,26 +133,12 @@ adj dywiz samodz | @@ -138,26 +133,12 @@ adj dywiz samodz | ||
138 | # ? | 133 | # ? |
139 | samodz dywiz adj | 134 | samodz dywiz adj |
140 | 135 | ||
141 | -#### PONIŻEJ REGUŁY WK | ||
142 | -# Stopień najwyższy: | ||
143 | -# np. „naj·zieleńszy”, „naj·mądrzej” | ||
144 | -moze_interp( naj> adj_sup ) | ||
145 | -moze_interp( nie> naj> adj_sup ) | ||
146 | -# Cząstka li przy osobowych formach czasownika oddzielona dywizem: znasz-li ten kraj | ||
147 | -moze_interp( praet_sg dywiz li) | ||
148 | -moze_interp( praet_pl dywiz li) | ||
149 | -moze_interp( praet_sg_na dywiz li) | ||
150 | -moze_interp( fin dywiz li) | ||
151 | - | ||
152 | -# i bez dywizu --- czy bez dywizu jest sens to łapać? | ||
153 | -#moze_interp( praet_sg li) | ||
154 | -#moze_interp( praet_pl li) | ||
155 | -#moze_interp( praet_sg_na li) | ||
156 | -#moze_interp( fin li) | ||
157 | - | ||
158 | [generator combinations] | 136 | [generator combinations] |
159 | -prefs> nomina | ||
160 | -nomina | 137 | +adj |
138 | +adj_sup | ||
139 | +samodz | ||
140 | +prefs> samodz | ||
141 | +prefv> samodz | ||
161 | 142 | ||
162 | [segment types] | 143 | [segment types] |
163 | naj | 144 | naj |
@@ -193,45 +174,16 @@ dywiz | @@ -193,45 +174,16 @@ dywiz | ||
193 | kropka | 174 | kropka |
194 | samodz | 175 | samodz |
195 | 176 | ||
196 | -[lexemes] | ||
197 | -z_aglt aby:comp | ||
198 | -z_aglt bowiem:comp | ||
199 | -by by:qub | ||
200 | -li li:qub | ||
201 | -z_aglt by:comp | ||
202 | -z_aglt cóż:subst | ||
203 | -z_aglt czemu:adv | ||
204 | -z_aglt czyżby:qub | ||
205 | -z_aglt choćby:comp | ||
206 | -z_aglt chociażby:comp | ||
207 | -z_aglt dlaczego:adv | ||
208 | -z_aglt dopóki:comp | ||
209 | -z_aglt dopóty:conj | ||
210 | -z_aglt gdyby:comp | ||
211 | -z_aglt gdzie:qub | ||
212 | -z_aglt gdzie:adv | ||
213 | -z_aglt jakby:comp | ||
214 | -z_aglt jakoby:comp | ||
215 | -z_aglt kiedy:adv | ||
216 | -z_aglt kiedy:comp | ||
217 | -z_aglt tylko:qub | ||
218 | -z_aglt żeby:comp | ||
219 | -dywiz -:interp | ||
220 | -kropka .:interp | ||
221 | - | ||
222 | [tags] | 177 | [tags] |
223 | naj naj | 178 | naj naj |
224 | nie nie | 179 | nie nie |
225 | prefs prefs | 180 | prefs prefs |
226 | prefv prefv | 181 | prefv prefv |
227 | -prefa prefa | ||
228 | dig dig | 182 | dig dig |
229 | adja adja | 183 | adja adja |
230 | adj adj:%:pos | 184 | adj adj:%:pos |
231 | adj_sup adj:%:sup | 185 | adj_sup adj:%:sup |
232 | adj_sup adv:sup | 186 | adj_sup adv:sup |
233 | -adj_com adj:%:com | ||
234 | -adj_com adj:%:com | ||
235 | negat ger:%:neg | 187 | negat ger:%:neg |
236 | negat pact:%:neg | 188 | negat pact:%:neg |
237 | negat ppas:%:neg | 189 | negat ppas:%:neg |
@@ -243,22 +195,39 @@ samotny interj | @@ -243,22 +195,39 @@ samotny interj | ||
243 | interp interp | 195 | interp interp |
244 | aglsg aglt:sg:% | 196 | aglsg aglt:sg:% |
245 | aglpl aglt:pl:% | 197 | aglpl aglt:pl:% |
198 | +#praetcond cond:% | ||
199 | +#praetcond praet:%:pri:% | ||
200 | +#praetcond praet:%:sec:% | ||
201 | +#praetcond praet:%:ter:% | ||
246 | praet_sg_agl praet:sg:%:agl | 202 | praet_sg_agl praet:sg:%:agl |
247 | praet_sg_na praet:sg:%:nagl | 203 | praet_sg_na praet:sg:%:nagl |
248 | praet_sg praet:sg:% | 204 | praet_sg praet:sg:% |
249 | praet_pl praet:pl:% | 205 | praet_pl praet:pl:% |
250 | praet_sg winien:sg:% | 206 | praet_sg winien:sg:% |
251 | praet_pl winien:pl:% | 207 | praet_pl winien:pl:% |
252 | -fin fin:% | ||
253 | -nomina subst:% | ||
254 | -nomina ger:% | ||
255 | -nomina depr:% | ||
256 | -adjectiva adv:% | ||
257 | -adjectiva ppas:% | ||
258 | -adjectiva pact:% | ||
259 | -verba_imperf praet:%:imperf | ||
260 | -verba_imperf fin:%:imperf | ||
261 | -verba_imperf inf:imperf | ||
262 | -verba_imperf imps:imperf | ||
263 | -verba_imperf impt:%:imperf | ||
264 | samodz % | 208 | samodz % |
209 | + | ||
210 | +[lexemes] | ||
211 | +z_aglt aby:comp | ||
212 | +z_aglt bowiem:comp | ||
213 | +by by:qub | ||
214 | +z_aglt by:comp | ||
215 | +z_aglt cóż:subst | ||
216 | +z_aglt czemu:adv | ||
217 | +z_aglt czyżby:qub | ||
218 | +z_aglt choćby:comp | ||
219 | +z_aglt chociażby:comp | ||
220 | +z_aglt dlaczego:adv | ||
221 | +z_aglt dopóki:comp | ||
222 | +z_aglt dopóty:conj | ||
223 | +z_aglt gdyby:comp | ||
224 | +z_aglt gdzie:qub | ||
225 | +z_aglt gdzie:adv | ||
226 | +z_aglt jakby:comp | ||
227 | +z_aglt jakoby:comp | ||
228 | +z_aglt kiedy:adv | ||
229 | +z_aglt kiedy:comp | ||
230 | +z_aglt tylko:qub | ||
231 | +z_aglt żeby:comp | ||
232 | +dywiz -:interp | ||
233 | +kropka .:interp |
morfeusz/CMakeLists.txt
1 | 1 | ||
2 | + | ||
2 | ########## generate default dictionary data ################# | 3 | ########## generate default dictionary data ################# |
3 | add_custom_command ( | 4 | add_custom_command ( |
4 | OUTPUT "${INPUT_DICTIONARY_CPP}" | 5 | OUTPUT "${INPUT_DICTIONARY_CPP}" |
@@ -57,7 +58,7 @@ set(INCLUDE_FILES | @@ -57,7 +58,7 @@ set(INCLUDE_FILES | ||
57 | 58 | ||
58 | add_library (libmorfeusz SHARED ${SRC_FILES}) | 59 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
59 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) | 60 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) |
60 | -set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz") | 61 | +set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") |
61 | 62 | ||
62 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) | 63 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
63 | add_executable (morfeusz_generator morfeusz_generator.cpp) | 64 | add_executable (morfeusz_generator morfeusz_generator.cpp) |
morfeusz/Environment.cpp
@@ -21,13 +21,6 @@ static Deserializer<vector<InterpsGroup> >& initializeDeserializer(MorfeuszProce | @@ -21,13 +21,6 @@ static Deserializer<vector<InterpsGroup> >& initializeDeserializer(MorfeuszProce | ||
21 | return *(processorType == ANALYZER ? analyzerDeserializer : generatorDeserializer); | 21 | return *(processorType == ANALYZER ? analyzerDeserializer : generatorDeserializer); |
22 | } | 22 | } |
23 | 23 | ||
24 | -static SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map) { | ||
25 | - SegrulesOptions opts; | ||
26 | - opts["aggl"] = "isolated"; | ||
27 | - opts["praet"] = "split"; | ||
28 | - return (*(map.find(opts))).second; | ||
29 | -} | ||
30 | - | ||
31 | static void deleteSegrulesFSAs(std::map<SegrulesOptions, SegrulesFSA*>& fsasMap) { | 24 | static void deleteSegrulesFSAs(std::map<SegrulesOptions, SegrulesFSA*>& fsasMap) { |
32 | for ( | 25 | for ( |
33 | std::map<SegrulesOptions, SegrulesFSA*>::iterator it = fsasMap.begin(); | 26 | std::map<SegrulesOptions, SegrulesFSA*>::iterator it = fsasMap.begin(); |
@@ -43,23 +36,23 @@ Environment::Environment( | @@ -43,23 +36,23 @@ Environment::Environment( | ||
43 | MorfeuszProcessorType processorType, | 36 | MorfeuszProcessorType processorType, |
44 | const unsigned char* fsaFileStartPtr) | 37 | const unsigned char* fsaFileStartPtr) |
45 | : currentCharsetConverter(getCharsetConverter(charset)), | 38 | : currentCharsetConverter(getCharsetConverter(charset)), |
46 | - utf8CharsetConverter(), | ||
47 | - isoCharsetConverter(), | ||
48 | - cp1250CharsetConverter(), | ||
49 | - cp852CharsetConverter(), | ||
50 | - caseConverter(), | ||
51 | - tagset(fsaFileStartPtr), | ||
52 | - fsaFileStartPtr(fsaFileStartPtr), | ||
53 | - fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))), | ||
54 | - segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)), | ||
55 | - currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap)), | ||
56 | - isFromFile(false), | ||
57 | - chunksDecoder( | ||
58 | - processorType == ANALYZER | ||
59 | - ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this) | ||
60 | - : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)), | ||
61 | - processorType(processorType) | ||
62 | - { | 39 | +utf8CharsetConverter(), |
40 | +isoCharsetConverter(), | ||
41 | +cp1250CharsetConverter(), | ||
42 | +cp852CharsetConverter(), | ||
43 | +caseConverter(), | ||
44 | +tagset(fsaFileStartPtr), | ||
45 | +fsaFileStartPtr(fsaFileStartPtr), | ||
46 | +fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))), | ||
47 | +segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)), | ||
48 | +currSegrulesOptions(getDefaultSegrulesOptions(fsaFileStartPtr)), | ||
49 | +currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap, fsaFileStartPtr)), | ||
50 | +isFromFile(false), | ||
51 | +chunksDecoder( | ||
52 | +processorType == ANALYZER | ||
53 | +? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this) | ||
54 | +: (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)), | ||
55 | +processorType(processorType) { | ||
63 | } | 56 | } |
64 | 57 | ||
65 | const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const { | 58 | const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const { |
@@ -129,3 +122,16 @@ const FSAType& Environment::getFSA() const { | @@ -129,3 +122,16 @@ const FSAType& Environment::getFSA() const { | ||
129 | const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const { | 122 | const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const { |
130 | return *(this->chunksDecoder); | 123 | return *(this->chunksDecoder); |
131 | } | 124 | } |
125 | + | ||
126 | +void Environment::setSegrulesOption(const std::string& option, const std::string& value) { | ||
127 | + if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) { | ||
128 | + throw MorfeuszException("Invalid segmentation option '"+option+"'"); | ||
129 | + } | ||
130 | + SegrulesOptions prevOptions = this->currSegrulesOptions; | ||
131 | + this->currSegrulesOptions[option] = value; | ||
132 | + if (this->segrulesFSAsMap.find(this->currSegrulesOptions) == this->segrulesFSAsMap.end()) { | ||
133 | + this->currSegrulesOptions = prevOptions; | ||
134 | + throw MorfeuszException("Invalid '"+option+"' option value: '"+value+"'"); | ||
135 | + } | ||
136 | + this->currSegrulesFSA = this->segrulesFSAsMap.find(this->currSegrulesOptions)->second; | ||
137 | +} |
morfeusz/Environment.hpp
@@ -41,6 +41,8 @@ public: | @@ -41,6 +41,8 @@ public: | ||
41 | 41 | ||
42 | void setFSAFile(const std::string& filename); | 42 | void setFSAFile(const std::string& filename); |
43 | 43 | ||
44 | + void setSegrulesOption(const std::string& option, const std::string& value); | ||
45 | + | ||
44 | const SegrulesFSA& getCurrentSegrulesFSA() const; | 46 | const SegrulesFSA& getCurrentSegrulesFSA() const; |
45 | 47 | ||
46 | const FSAType& getFSA() const; | 48 | const FSAType& getFSA() const; |
@@ -60,6 +62,7 @@ private: | @@ -60,6 +62,7 @@ private: | ||
60 | const unsigned char* fsaFileStartPtr; | 62 | const unsigned char* fsaFileStartPtr; |
61 | const FSAType* fsa; | 63 | const FSAType* fsa; |
62 | std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap; | 64 | std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap; |
65 | + SegrulesOptions currSegrulesOptions; | ||
63 | const SegrulesFSA* currSegrulesFSA; | 66 | const SegrulesFSA* currSegrulesFSA; |
64 | bool isFromFile; | 67 | bool isFromFile; |
65 | 68 |
morfeusz/InterpretedChunksDecoder.hpp
@@ -29,21 +29,40 @@ public: | @@ -29,21 +29,40 @@ public: | ||
29 | : env(env) { | 29 | : env(env) { |
30 | } | 30 | } |
31 | 31 | ||
32 | + virtual ~InterpretedChunksDecoder() { | ||
33 | + } | ||
34 | + | ||
32 | virtual void decode( | 35 | virtual void decode( |
33 | unsigned int startNode, | 36 | unsigned int startNode, |
34 | unsigned int endNode, | 37 | unsigned int endNode, |
35 | const InterpretedChunk& interpretedChunk, | 38 | const InterpretedChunk& interpretedChunk, |
36 | std::vector<MorphInterpretation>& out) const = 0; | 39 | std::vector<MorphInterpretation>& out) const = 0; |
37 | 40 | ||
38 | - virtual ~InterpretedChunksDecoder() { | ||
39 | - } | ||
40 | - | ||
41 | protected: | 41 | protected: |
42 | + | ||
43 | + virtual MorphInterpretation decodeMorphInterpretation( | ||
44 | + unsigned int startNode, unsigned int endNode, | ||
45 | + const string& orth, | ||
46 | + const string& lemmaPrefix, | ||
47 | + const InterpretedChunk& chunk, | ||
48 | + const unsigned char*& ptr) const = 0; | ||
42 | 49 | ||
43 | virtual void decodeForm( | 50 | virtual void decodeForm( |
44 | const std::vector<uint32_t>& orth, | 51 | const std::vector<uint32_t>& orth, |
45 | const EncodedForm& form, | 52 | const EncodedForm& form, |
46 | std::string& res) const = 0; | 53 | std::string& res) const = 0; |
54 | + | ||
55 | + EncodedInterpretation deserializeInterp(const unsigned char*& ptr) const { | ||
56 | + EncodedInterpretation interp; | ||
57 | + deserializeEncodedForm(ptr, interp.value); | ||
58 | + interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr))); | ||
59 | + ptr += 2; | ||
60 | + interp.nameClassifier = *ptr; | ||
61 | + ptr++; | ||
62 | + return interp; | ||
63 | + } | ||
64 | + | ||
65 | + virtual void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const = 0; | ||
47 | 66 | ||
48 | const Environment& env; | 67 | const Environment& env; |
49 | }; | 68 | }; |
@@ -53,7 +72,7 @@ public: | @@ -53,7 +72,7 @@ public: | ||
53 | 72 | ||
54 | InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) { | 73 | InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) { |
55 | } | 74 | } |
56 | - | 75 | + |
57 | void decode( | 76 | void decode( |
58 | unsigned int startNode, | 77 | unsigned int startNode, |
59 | unsigned int endNode, | 78 | unsigned int endNode, |
@@ -90,42 +109,12 @@ protected: | @@ -90,42 +109,12 @@ protected: | ||
90 | } | 109 | } |
91 | } | 110 | } |
92 | 111 | ||
93 | -private: | ||
94 | - | ||
95 | - void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& originalForm, std::string& decodedForm) const { | ||
96 | - for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | ||
97 | - const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | ||
98 | - originalForm += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | ||
99 | - const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | ||
100 | - MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, originalForm, string(""), prefixChunk, ptr); | ||
101 | - decodedForm += mi.getLemma(); | ||
102 | - } | ||
103 | - } | ||
104 | - | ||
105 | - MorphInterpretation decodeMorphInterpretation( | ||
106 | - unsigned int startNode, unsigned int endNode, | ||
107 | - const string& orth, | ||
108 | - const string& lemmaPrefix, | ||
109 | - const InterpretedChunk& chunk, | ||
110 | - const unsigned char*& ptr) const { | ||
111 | - string lemma = lemmaPrefix; | ||
112 | - EncodedInterpretation ei = this->decodeInterp(ptr); | ||
113 | - this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma); | ||
114 | - return MorphInterpretation( | ||
115 | - startNode, endNode, | ||
116 | - orth, lemma, | ||
117 | - ei.tag, | ||
118 | - ei.nameClassifier, | ||
119 | - env.getTagset(), | ||
120 | - env.getCharsetConverter()); | ||
121 | - } | ||
122 | - | ||
123 | - void decodeLemma(const unsigned char*& ptr, EncodedForm& lemma) const { | ||
124 | - lemma.suffixToCut = *ptr; | 112 | + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const { |
113 | + encodedForm.suffixToCut = *ptr; | ||
125 | ptr++; | 114 | ptr++; |
126 | - lemma.suffixToAdd = (const char*) ptr; | 115 | + encodedForm.suffixToAdd = (const char*) ptr; |
127 | ptr += strlen((const char*) ptr) + 1; | 116 | ptr += strlen((const char*) ptr) + 1; |
128 | - assert(lemma.casePattern.size() == 0); | 117 | + assert(encodedForm.casePattern.size() == 0); |
129 | // lemma.casePattern.resize(MAX_WORD_SIZE, false); | 118 | // lemma.casePattern.resize(MAX_WORD_SIZE, false); |
130 | uint8_t casePatternType = *ptr; | 119 | uint8_t casePatternType = *ptr; |
131 | ptr++; | 120 | ptr++; |
@@ -139,7 +128,7 @@ private: | @@ -139,7 +128,7 @@ private: | ||
139 | ptr++; | 128 | ptr++; |
140 | for (unsigned int i = 0; i < prefixLength; i++) { | 129 | for (unsigned int i = 0; i < prefixLength; i++) { |
141 | // lemma.casePattern[i] = true; | 130 | // lemma.casePattern[i] = true; |
142 | - lemma.casePattern.push_back(true); | 131 | + encodedForm.casePattern.push_back(true); |
143 | } | 132 | } |
144 | // lemma.casePattern.resize(prefixLength, true); | 133 | // lemma.casePattern.resize(prefixLength, true); |
145 | break; | 134 | break; |
@@ -150,21 +139,40 @@ private: | @@ -150,21 +139,40 @@ private: | ||
150 | uint8_t idx = *ptr; | 139 | uint8_t idx = *ptr; |
151 | ptr++; | 140 | ptr++; |
152 | // lemma.casePattern[idx] = true; | 141 | // lemma.casePattern[idx] = true; |
153 | - lemma.casePattern.resize(idx + 1, false); | ||
154 | - lemma.casePattern[idx] = true; | 142 | + encodedForm.casePattern.resize(idx + 1, false); |
143 | + encodedForm.casePattern[idx] = true; | ||
155 | } | 144 | } |
156 | break; | 145 | break; |
157 | } | 146 | } |
158 | } | 147 | } |
159 | - | ||
160 | - EncodedInterpretation decodeInterp(const unsigned char*& ptr) const { | ||
161 | - EncodedInterpretation interp; | ||
162 | - decodeLemma(ptr, interp.value); | ||
163 | - interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr))); | ||
164 | - ptr += 2; | ||
165 | - interp.nameClassifier = *ptr; | ||
166 | - ptr++; | ||
167 | - return interp; | 148 | +private: |
149 | + | ||
150 | + MorphInterpretation decodeMorphInterpretation( | ||
151 | + unsigned int startNode, unsigned int endNode, | ||
152 | + const string& orth, | ||
153 | + const string& lemmaPrefix, | ||
154 | + const InterpretedChunk& chunk, | ||
155 | + const unsigned char*& ptr) const { | ||
156 | + string lemma = lemmaPrefix; | ||
157 | + EncodedInterpretation ei = this->deserializeInterp(ptr); | ||
158 | + this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma); | ||
159 | + return MorphInterpretation( | ||
160 | + startNode, endNode, | ||
161 | + orth, lemma, | ||
162 | + ei.tag, | ||
163 | + ei.nameClassifier, | ||
164 | + env.getTagset(), | ||
165 | + env.getCharsetConverter()); | ||
166 | + } | ||
167 | + | ||
168 | + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const { | ||
169 | + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | ||
170 | + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | ||
171 | + orth += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | ||
172 | + const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | ||
173 | + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orth, string(""), prefixChunk, ptr); | ||
174 | + lemmaPrefix += mi.getLemma(); | ||
175 | + } | ||
168 | } | 176 | } |
169 | }; | 177 | }; |
170 | 178 | ||
@@ -173,35 +181,51 @@ public: | @@ -173,35 +181,51 @@ public: | ||
173 | 181 | ||
174 | InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) { | 182 | InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) { |
175 | } | 183 | } |
176 | - | 184 | + |
177 | void decode( | 185 | void decode( |
178 | unsigned int startNode, | 186 | unsigned int startNode, |
179 | unsigned int endNode, | 187 | unsigned int endNode, |
180 | const InterpretedChunk& interpretedChunk, | 188 | const InterpretedChunk& interpretedChunk, |
181 | std::vector<MorphInterpretation>& out) const { | 189 | std::vector<MorphInterpretation>& out) const { |
182 | - // string orth; | ||
183 | - // string lemma; | ||
184 | - // convertPrefixes(interpretedChunk, lemma, orth); | ||
185 | - // size_t orthLength = orth.length(); | ||
186 | - // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | ||
187 | - // for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) { | ||
188 | - // const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i]; | ||
189 | - // decodeForm( | ||
190 | - // interpretedChunk.originalCodepoints, | ||
191 | - // ei.value, | ||
192 | - // orth); | ||
193 | - // out.push_back(MorphInterpretation( | ||
194 | - // startNode, endNode, | ||
195 | - // orth, lemma, | ||
196 | - // ei.tag, | ||
197 | - // ei.nameClassifier, | ||
198 | - // env.getTagset(), | ||
199 | - // env.getCharsetConverter())); | ||
200 | - // orth.erase(orthLength); | ||
201 | - // } | 190 | + string orthPrefix; |
191 | + string lemma; | ||
192 | + convertPrefixes(interpretedChunk, orthPrefix, lemma); | ||
193 | + lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | ||
194 | + const unsigned char* currPtr = interpretedChunk.interpsGroup.ptr; | ||
195 | + while (currPtr - interpretedChunk.interpsGroup.ptr < interpretedChunk.interpsGroup.size) { | ||
196 | + out.push_back(this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr)); | ||
197 | + } | ||
202 | } | 198 | } |
203 | 199 | ||
204 | private: | 200 | private: |
201 | + | ||
202 | + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const { | ||
203 | + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | ||
204 | + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | ||
205 | + lemma += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | ||
206 | + const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | ||
207 | + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr); | ||
208 | + orthPrefix += mi.getOrth(); | ||
209 | + } | ||
210 | + } | ||
211 | + | ||
212 | + MorphInterpretation decodeMorphInterpretation( | ||
213 | + unsigned int startNode, unsigned int endNode, | ||
214 | + const string& orthPrefix, | ||
215 | + const string& lemma, | ||
216 | + const InterpretedChunk& chunk, | ||
217 | + const unsigned char*& ptr) const { | ||
218 | + string orth = orthPrefix; | ||
219 | + EncodedInterpretation ei = this->deserializeInterp(ptr); | ||
220 | + this->decodeForm(chunk.originalCodepoints, ei.value, orth); | ||
221 | + return MorphInterpretation( | ||
222 | + startNode, endNode, | ||
223 | + orth, lemma, | ||
224 | + ei.tag, | ||
225 | + ei.nameClassifier, | ||
226 | + env.getTagset(), | ||
227 | + env.getCharsetConverter()); | ||
228 | + } | ||
205 | 229 | ||
206 | void decodeForm( | 230 | void decodeForm( |
207 | const vector<uint32_t>& lemma, | 231 | const vector<uint32_t>& lemma, |
@@ -218,6 +242,15 @@ private: | @@ -218,6 +242,15 @@ private: | ||
218 | env.getCharsetConverter().append(cp, res); | 242 | env.getCharsetConverter().append(cp, res); |
219 | } | 243 | } |
220 | } | 244 | } |
245 | + | ||
246 | + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const { | ||
247 | + encodedForm.prefixToAdd = (const char*) ptr; | ||
248 | + ptr += strlen((const char*) ptr) + 1; | ||
249 | + encodedForm.suffixToCut = *ptr; | ||
250 | + ptr++; | ||
251 | + encodedForm.suffixToAdd = (const char*) ptr; | ||
252 | + ptr += strlen((const char*) ptr) + 1; | ||
253 | + } | ||
221 | }; | 254 | }; |
222 | 255 | ||
223 | #endif /* INTERPSGROUPDECODER_HPP */ | 256 | #endif /* INTERPSGROUPDECODER_HPP */ |
morfeusz/Morfeusz.cpp
@@ -40,32 +40,13 @@ options(createDefaultOptions()) { | @@ -40,32 +40,13 @@ options(createDefaultOptions()) { | ||
40 | 40 | ||
41 | void Morfeusz::setAnalyzerFile(const string& filename) { | 41 | void Morfeusz::setAnalyzerFile(const string& filename) { |
42 | this->analyzerEnv.setFSAFile(filename); | 42 | this->analyzerEnv.setFSAFile(filename); |
43 | - // if (this->isAnalyzerFSAFromFile) { | ||
44 | - // delete this->analyzerFSA; | ||
45 | - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap); | ||
46 | - // delete this->analyzerPtr; | ||
47 | - // } | ||
48 | - // this->analyzerPtr = readFile<unsigned char>(filename.c_str()); | ||
49 | - // this->analyzerFSA = FSA< vector<InterpsGroup> > ::getFSA(analyzerPtr, *initializeAnalyzerDeserializer()); | ||
50 | - // this->analyzerSegrulesFSAsMap = createSegrulesFSAsMap(analyzerPtr); | ||
51 | - // this->isAnalyzerFSAFromFile = true; | ||
52 | } | 43 | } |
53 | 44 | ||
54 | void Morfeusz::setGeneratorFile(const string& filename) { | 45 | void Morfeusz::setGeneratorFile(const string& filename) { |
55 | this->generatorEnv.setFSAFile(filename); | 46 | this->generatorEnv.setFSAFile(filename); |
56 | - // if (this->isGeneratorFSAFromFile) { | ||
57 | - // delete this->generatorPtr; | ||
58 | - // } | ||
59 | - // this->generatorPtr = readFile<unsigned char>(filename.c_str()); | ||
60 | - // this->generator.setGeneratorPtr(generatorPtr); | ||
61 | } | 47 | } |
62 | 48 | ||
63 | Morfeusz::~Morfeusz() { | 49 | Morfeusz::~Morfeusz() { |
64 | - // if (this->isAnalyzerFSAFromFile) { | ||
65 | - // delete this->analyzerFSA; | ||
66 | - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap); | ||
67 | - // delete this->analyzerPtr; | ||
68 | - // } | ||
69 | } | 50 | } |
70 | 51 | ||
71 | void Morfeusz::processOneWord( | 52 | void Morfeusz::processOneWord( |
@@ -97,7 +78,6 @@ void Morfeusz::processOneWord( | @@ -97,7 +78,6 @@ void Morfeusz::processOneWord( | ||
97 | } | 78 | } |
98 | srcNode++; | 79 | srcNode++; |
99 | } | 80 | } |
100 | - // graph.getResults(*this->tagset, results); | ||
101 | } | 81 | } |
102 | else if (inputStart != inputEnd) { | 82 | else if (inputStart != inputEnd) { |
103 | this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); | 83 | this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); |
@@ -140,6 +120,7 @@ void Morfeusz::doProcessOneWord( | @@ -140,6 +120,7 @@ void Morfeusz::doProcessOneWord( | ||
140 | vector<InterpsGroup> val(state.getValue()); | 120 | vector<InterpsGroup> val(state.getValue()); |
141 | for (unsigned int i = 0; i < val.size(); i++) { | 121 | for (unsigned int i = 0; i < val.size(); i++) { |
142 | InterpsGroup& ig = val[i]; | 122 | InterpsGroup& ig = val[i]; |
123 | +// cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl; | ||
143 | set<SegrulesState> newSegrulesStates; | 124 | set<SegrulesState> newSegrulesStates; |
144 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); | 125 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); |
145 | for ( | 126 | for ( |
@@ -147,9 +128,6 @@ void Morfeusz::doProcessOneWord( | @@ -147,9 +128,6 @@ void Morfeusz::doProcessOneWord( | ||
147 | it != newSegrulesStates.end(); | 128 | it != newSegrulesStates.end(); |
148 | ++it) { | 129 | ++it) { |
149 | SegrulesState newSegrulesState = *it; | 130 | SegrulesState newSegrulesState = *it; |
150 | -// if (newSegrulesState.shiftOrthFromPrevious) { | ||
151 | -// | ||
152 | -// } | ||
153 | InterpretedChunk ic = { | 131 | InterpretedChunk ic = { |
154 | inputData, | 132 | inputData, |
155 | originalCodepoints, | 133 | originalCodepoints, |
@@ -160,12 +138,19 @@ void Morfeusz::doProcessOneWord( | @@ -160,12 +138,19 @@ void Morfeusz::doProcessOneWord( | ||
160 | vector<InterpretedChunk>() | 138 | vector<InterpretedChunk>() |
161 | }; | 139 | }; |
162 | if (!accum.empty() && accum.back().shiftOrth) { | 140 | if (!accum.empty() && accum.back().shiftOrth) { |
141 | +// cerr << "shift orth from " << (int) accum.back().interpsGroup.type << " to " << (int) ig.type << endl; | ||
163 | doShiftOrth(accum.back(), ic); | 142 | doShiftOrth(accum.back(), ic); |
164 | } | 143 | } |
165 | accum.push_back(ic); | 144 | accum.push_back(ic); |
166 | if (isEndOfWord(codepoint)) { | 145 | if (isEndOfWord(codepoint)) { |
167 | - if (newSegrulesState.accepting) | 146 | +// cerr << "end of word" << endl; |
147 | + if (newSegrulesState.accepting) { | ||
148 | +// cerr << "accept " << (int) ig.type << endl; | ||
168 | graph.addPath(accum); | 149 | graph.addPath(accum); |
150 | + } | ||
151 | + else { | ||
152 | +// cerr << "not accept " << (int) ig.type << endl; | ||
153 | + } | ||
169 | } | 154 | } |
170 | else { | 155 | else { |
171 | const char* newCurrInput = currInput; | 156 | const char* newCurrInput = currInput; |
@@ -190,8 +175,6 @@ void Morfeusz::appendIgnotiumToResults( | @@ -190,8 +175,6 @@ void Morfeusz::appendIgnotiumToResults( | ||
190 | } | 175 | } |
191 | 176 | ||
192 | ResultsIterator Morfeusz::analyze(const string& text) const { | 177 | ResultsIterator Morfeusz::analyze(const string& text) const { |
193 | - // const char* textStart = text.c_str(); | ||
194 | - // const char* textEnd = text.c_str() + text.length(); | ||
195 | vector<MorphInterpretation> res; | 178 | vector<MorphInterpretation> res; |
196 | this->analyze(text, res); | 179 | this->analyze(text, res); |
197 | return ResultsIterator(res); | 180 | return ResultsIterator(res); |
@@ -207,29 +190,54 @@ void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) | @@ -207,29 +190,54 @@ void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) | ||
207 | } | 190 | } |
208 | 191 | ||
209 | ResultsIterator Morfeusz::generate(const string& text) const { | 192 | ResultsIterator Morfeusz::generate(const string& text) const { |
210 | - // const char* textStart = text.c_str(); | ||
211 | - // const char* textEnd = text.c_str() + text.length(); | ||
212 | vector<MorphInterpretation> res; | 193 | vector<MorphInterpretation> res; |
213 | this->generate(text, res); | 194 | this->generate(text, res); |
214 | return ResultsIterator(res); | 195 | return ResultsIterator(res); |
215 | } | 196 | } |
216 | 197 | ||
217 | -void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results) const { | ||
218 | - const char* input = text.c_str(); | ||
219 | - const char* inputEnd = input + text.length(); | 198 | +ResultsIterator Morfeusz::generate(const string& text, int tagnum) const { |
199 | + vector<MorphInterpretation> res; | ||
200 | + this->generate(text, tagnum, res); | ||
201 | + return ResultsIterator(res); | ||
202 | +} | ||
203 | + | ||
204 | +void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& results) const { | ||
205 | + const char* input = lemma.c_str(); | ||
206 | + const char* inputEnd = input + lemma.length(); | ||
220 | while (input != inputEnd) { | 207 | while (input != inputEnd) { |
221 | int startNode = results.empty() ? 0 : results.back().getEndNode(); | 208 | int startNode = results.empty() ? 0 : results.back().getEndNode(); |
222 | this->processOneWord(this->generatorEnv, input, inputEnd, startNode, results); | 209 | this->processOneWord(this->generatorEnv, input, inputEnd, startNode, results); |
223 | } | 210 | } |
224 | } | 211 | } |
225 | 212 | ||
213 | +// XXX - someday it should be improved | ||
214 | +void Morfeusz::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const { | ||
215 | + vector<MorphInterpretation> partRes; | ||
216 | + this->generate(lemma, partRes); | ||
217 | + for (unsigned int i = 0; i < partRes.size(); i++) { | ||
218 | + if (partRes[i].getTagnum() == tagnum) { | ||
219 | + result.push_back(partRes[i]); | ||
220 | + } | ||
221 | + } | ||
222 | +} | ||
223 | + | ||
226 | void Morfeusz::setCharset(MorfeuszCharset charset) { | 224 | void Morfeusz::setCharset(MorfeuszCharset charset) { |
227 | this->options.encoding = charset; | 225 | this->options.encoding = charset; |
228 | this->analyzerEnv.setCharset(charset); | 226 | this->analyzerEnv.setCharset(charset); |
229 | this->generatorEnv.setCharset(charset); | 227 | this->generatorEnv.setCharset(charset); |
230 | } | 228 | } |
231 | 229 | ||
232 | -ResultsIterator::ResultsIterator(vector<MorphInterpretation>& res) { | 230 | +void Morfeusz::setAggl(const std::string& aggl) { |
231 | + this->analyzerEnv.setSegrulesOption("aggl", aggl); | ||
232 | + this->generatorEnv.setSegrulesOption("aggl", aggl); | ||
233 | +} | ||
234 | + | ||
235 | +void Morfeusz::setPraet(const std::string& praet) { | ||
236 | + this->analyzerEnv.setSegrulesOption("praet", praet); | ||
237 | + this->generatorEnv.setSegrulesOption("praet", praet); | ||
238 | +} | ||
239 | + | ||
240 | +ResultsIterator::ResultsIterator(const vector<MorphInterpretation>& res) { | ||
233 | resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end()); | 241 | resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end()); |
234 | } | 242 | } |
235 | 243 |
morfeusz/Morfeusz.hpp
@@ -35,6 +35,12 @@ class ResultsIterator; | @@ -35,6 +35,12 @@ class ResultsIterator; | ||
35 | 35 | ||
36 | typedef State< std::vector<InterpsGroup > > StateType; | 36 | typedef State< std::vector<InterpsGroup > > StateType; |
37 | 37 | ||
38 | +/** | ||
39 | + * Performs morphological analysis (analyze methods) and syntesis (generate methods). | ||
40 | + * | ||
41 | + * It is NOT thread-safe | ||
42 | + * but it is possible to use separate Morfeusz instance for each concurrent thread. | ||
43 | + */ | ||
38 | class Morfeusz { | 44 | class Morfeusz { |
39 | public: | 45 | public: |
40 | 46 | ||
@@ -57,6 +63,9 @@ public: | @@ -57,6 +63,9 @@ public: | ||
57 | */ | 63 | */ |
58 | void setGeneratorFile(const std::string& filename); | 64 | void setGeneratorFile(const std::string& filename); |
59 | 65 | ||
66 | + /** | ||
67 | + * Destroys Morfeusz object. | ||
68 | + */ | ||
60 | virtual ~Morfeusz(); | 69 | virtual ~Morfeusz(); |
61 | 70 | ||
62 | /** | 71 | /** |
@@ -82,6 +91,16 @@ public: | @@ -82,6 +91,16 @@ public: | ||
82 | * @return - iterator over morphological analysis results | 91 | * @return - iterator over morphological analysis results |
83 | */ | 92 | */ |
84 | ResultsIterator generate(const std::string& lemma) const; | 93 | ResultsIterator generate(const std::string& lemma) const; |
94 | + | ||
95 | + /** | ||
96 | + * Perform morphological synthesis on a given lemma and return the results as iterator. | ||
97 | + * Limit results to interpretations with the specified tag. | ||
98 | + * | ||
99 | + * @param text - text for morphological analysis | ||
100 | + * @param tag - tag of result interpretations | ||
101 | + * @return - iterator over morphological analysis results | ||
102 | + */ | ||
103 | + ResultsIterator generate(const std::string& lemma, int tagnum) const; | ||
85 | 104 | ||
86 | /** | 105 | /** |
87 | * Perform morphological synthesis on a given lemma and put results in a vector. | 106 | * Perform morphological synthesis on a given lemma and put results in a vector. |
@@ -90,6 +109,16 @@ public: | @@ -90,6 +109,16 @@ public: | ||
90 | * @param result - results vector | 109 | * @param result - results vector |
91 | */ | 110 | */ |
92 | void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const; | 111 | void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const; |
112 | + | ||
113 | + /** | ||
114 | + * Perform morphological synthesis on a given lemma and put results in a vector. | ||
115 | + * Limit results to interpretations with the specified tag. | ||
116 | + * | ||
117 | + * @param lemma - lemma to be analyzed | ||
118 | + * @param tag - tag of result interpretations | ||
119 | + * @param result - results vector | ||
120 | + */ | ||
121 | + void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const; | ||
93 | 122 | ||
94 | /** | 123 | /** |
95 | * Set encoding for input and output string objects. | 124 | * Set encoding for input and output string objects. |
@@ -97,6 +126,20 @@ public: | @@ -97,6 +126,20 @@ public: | ||
97 | * @param encoding | 126 | * @param encoding |
98 | */ | 127 | */ |
99 | void setCharset(MorfeuszCharset encoding); | 128 | void setCharset(MorfeuszCharset encoding); |
129 | + | ||
130 | + /** | ||
131 | + * Set aggl segmentation option value. | ||
132 | + * | ||
133 | + * @param aggl | ||
134 | + */ | ||
135 | + void setAggl(const std::string& aggl); | ||
136 | + | ||
137 | + /** | ||
138 | + * Set praet segmentation option value. | ||
139 | + * | ||
140 | + * @param praet | ||
141 | + */ | ||
142 | + void setPraet(const std::string& praet); | ||
100 | 143 | ||
101 | friend class ResultsIterator; | 144 | friend class ResultsIterator; |
102 | private: | 145 | private: |
@@ -121,19 +164,9 @@ private: | @@ -121,19 +164,9 @@ private: | ||
121 | const std::string& word, | 164 | const std::string& word, |
122 | int startNodeNum, | 165 | int startNodeNum, |
123 | std::vector<MorphInterpretation>& results) const; | 166 | std::vector<MorphInterpretation>& results) const; |
167 | + | ||
124 | Environment analyzerEnv; | 168 | Environment analyzerEnv; |
125 | Environment generatorEnv; | 169 | Environment generatorEnv; |
126 | -// const unsigned char* analyzerPtr; | ||
127 | -// FSAType* analyzerFSA; | ||
128 | -// std::map<SegrulesOptions, SegrulesFSA*> analyzerSegrulesFSAsMap; | ||
129 | -// SegrulesFSA* currAnalyzerSegrulesFSA; | ||
130 | -// bool isAnalyzerFSAFromFile; | ||
131 | -// | ||
132 | -// const unsigned char* generatorPtr; | ||
133 | -// FSAType* generatorFSA; | ||
134 | -// bool isGeneratorFSAFromFile; | ||
135 | -// Generator generator; | ||
136 | - | ||
137 | MorfeuszOptions options; | 170 | MorfeuszOptions options; |
138 | }; | 171 | }; |
139 | 172 | ||
@@ -143,7 +176,7 @@ public: | @@ -143,7 +176,7 @@ public: | ||
143 | bool hasNext(); | 176 | bool hasNext(); |
144 | friend class Morfeusz; | 177 | friend class Morfeusz; |
145 | private: | 178 | private: |
146 | - ResultsIterator(vector<MorphInterpretation>& res); | 179 | + ResultsIterator(const std::vector<MorphInterpretation>& res); |
147 | const char* rawInput; | 180 | const char* rawInput; |
148 | std::list<MorphInterpretation> resultsBuffer; | 181 | std::list<MorphInterpretation> resultsBuffer; |
149 | int startNode; | 182 | int startNode; |
morfeusz/cli/cli.hpp
0 → 100644
1 | +/* | ||
2 | + * File: cli.hpp | ||
3 | + * Author: mlenart | ||
4 | + * | ||
5 | + * Created on 17 marzec 2014, 18:32 | ||
6 | + */ | ||
7 | + | ||
8 | +#ifndef CLI_HPP | ||
9 | +#define CLI_HPP | ||
10 | + | ||
11 | +#ifdef _WIN64 | ||
12 | +#define TMPDUPA_IN IN | ||
13 | +#define IN IN | ||
14 | +#else | ||
15 | +#ifdef _WIN32 | ||
16 | +#define TMPDUPA_IN IN | ||
17 | +#define IN IN | ||
18 | +#endif | ||
19 | +#endif | ||
20 | + | ||
21 | +#include <iostream> | ||
22 | + | ||
23 | +#pragma GCC diagnostic push | ||
24 | +#pragma GCC diagnostic ignored "-Wsign-compare" | ||
25 | +#pragma GCC diagnostic ignored "-Wpedantic" | ||
26 | +#pragma GCC diagnostic ignored "-Wunused-variable" | ||
27 | +#pragma GCC diagnostic ignored "-Wconversion" | ||
28 | +#pragma GCC diagnostic ignored "-Wreorder" | ||
29 | +#pragma GCC diagnostic ignored "-Wlong-long" | ||
30 | +#pragma GCC diagnostic ignored "-Wunused-function" | ||
31 | +#pragma GCC diagnostic ignored "-Wcast-qual" | ||
32 | +#pragma GCC diagnostic ignored "-Wparentheses" | ||
33 | +#pragma GCC diagnostic ignored "-Wformat-extra-args" | ||
34 | + | ||
35 | +#include "ezOptionParser.hpp" | ||
36 | + | ||
37 | +#pragma GCC diagnostic pop | ||
38 | + | ||
39 | +void printCLIUsage(ez::ezOptionParser& opt, std::ostream& out) { | ||
40 | + std::string usage; | ||
41 | + opt.getUsage(usage); | ||
42 | + out << usage; | ||
43 | +} | ||
44 | + | ||
45 | +#ifdef _WIN64 | ||
46 | +#define IN TMPDUPA_IN | ||
47 | +#else | ||
48 | +#ifdef _WIN32 | ||
49 | +#define IN TMPDUPA_IN | ||
50 | +#endif | ||
51 | +#endif | ||
52 | + | ||
53 | +#endif /* CLI_HPP */ | ||
54 | + |
morfeusz/cli/ezOptionParser.hpp
0 → 100644
1 | +/* | ||
2 | +This file is part of ezOptionParser. See MIT-LICENSE. | ||
3 | + | ||
4 | +Copyright (C) 2011,2012 Remik Ziemlinski <first d0t surname att gmail> | ||
5 | + | ||
6 | +CHANGELOG | ||
7 | + | ||
8 | +v0.0.0 20110505 rsz Created. | ||
9 | +v0.1.0 20111006 rsz Added validator. | ||
10 | +v0.1.1 20111012 rsz Fixed validation of ulonglong. | ||
11 | +v0.1.2 20111126 rsz Allow flag names start with alphanumeric (previously, flag had to start with alpha). | ||
12 | +v0.1.3 20120108 rsz Created work-around for unique id generation with IDGenerator that avoids retarded c++ translation unit linker errors with single-header static variables. Forced inline on all methods to please retard compiler and avoid multiple def errors. | ||
13 | +v0.1.4 20120629 Enforced MIT license on all files. | ||
14 | +v0.2.0 20121120 Added parseIndex to OptionGroup. | ||
15 | +v0.2.1 20130506 Allow disabling doublespace of OPTIONS usage descriptions. | ||
16 | +*/ | ||
17 | +#ifndef EZ_OPTION_PARSER_H | ||
18 | +#define EZ_OPTION_PARSER_H | ||
19 | + | ||
20 | +#include <stdlib.h> | ||
21 | +#include <vector> | ||
22 | +#include <list> | ||
23 | +#include <map> | ||
24 | +#include <string> | ||
25 | +#include <iostream> | ||
26 | +#include <fstream> | ||
27 | +#include <algorithm> | ||
28 | +#include <limits> | ||
29 | +#include <sstream> | ||
30 | +#include <cstring> | ||
31 | + | ||
32 | +namespace ez { | ||
33 | +#define DEBUGLINE() printf("%s:%d\n", __FILE__, __LINE__); | ||
34 | + | ||
35 | +/* ################################################################### */ | ||
36 | +template<typename T> | ||
37 | +static T fromString(const std::string* s) { | ||
38 | + std::istringstream stream (s->c_str()); | ||
39 | + T t; | ||
40 | + stream >> t; | ||
41 | + return t; | ||
42 | +}; | ||
43 | +template<typename T> | ||
44 | +static T fromString(const char* s) { | ||
45 | + std::istringstream stream (s); | ||
46 | + T t; | ||
47 | + stream >> t; | ||
48 | + return t; | ||
49 | +}; | ||
50 | +/* ################################################################### */ | ||
51 | +static bool isdigit(const std::string & s, int i=0) { | ||
52 | + int n = s.length(); | ||
53 | + for(; i < n; ++i) | ||
54 | + switch(s[i]) { | ||
55 | + case '0': case '1': case '2': | ||
56 | + case '3': case '4': case '5': | ||
57 | + case '6': case '7': case '8': case '9': break; | ||
58 | + default: return false; | ||
59 | + } | ||
60 | + | ||
61 | + return true; | ||
62 | +}; | ||
63 | +/* ################################################################### */ | ||
64 | +static bool isdigit(const std::string * s, int i=0) { | ||
65 | + int n = s->length(); | ||
66 | + for(; i < n; ++i) | ||
67 | + switch(s->at(i)) { | ||
68 | + case '0': case '1': case '2': | ||
69 | + case '3': case '4': case '5': | ||
70 | + case '6': case '7': case '8': case '9': break; | ||
71 | + default: return false; | ||
72 | + } | ||
73 | + | ||
74 | + return true; | ||
75 | +}; | ||
76 | +/* ################################################################### */ | ||
77 | +/* | ||
78 | +Compare strings for opts, so short opt flags come before long format flags. | ||
79 | +For example, -d < --dimension < --dmn, and also lower come before upper. The default STL std::string compare doesn't do that. | ||
80 | +*/ | ||
81 | +static bool CmpOptStringPtr(std::string * s1, std::string * s2) { | ||
82 | + int c1,c2; | ||
83 | + const char *s=s1->c_str(); | ||
84 | + for(c1=0; c1 < s1->size(); ++c1) | ||
85 | + if (isalnum(s[c1])) // locale sensitive. | ||
86 | + break; | ||
87 | + | ||
88 | + s=s2->c_str(); | ||
89 | + for(c2=0; c2 < s2->size(); ++c2) | ||
90 | + if (isalnum(s[c2])) | ||
91 | + break; | ||
92 | + | ||
93 | + // Test which has more symbols before its name. | ||
94 | + if (c1 > c2) | ||
95 | + return false; | ||
96 | + else if (c1 < c2) | ||
97 | + return true; | ||
98 | + | ||
99 | + // Both have same number of symbols, so compare first letter. | ||
100 | + char char1 = s1->at(c1); | ||
101 | + char char2 = s2->at(c2); | ||
102 | + char lo1 = tolower(char1); | ||
103 | + char lo2 = tolower(char2); | ||
104 | + | ||
105 | + if (lo1 != lo2) | ||
106 | + return lo1 < lo2; | ||
107 | + | ||
108 | + // Their case doesn't match, so find which is lower. | ||
109 | + char up1 = isupper(char1); | ||
110 | + char up2 = isupper(char2); | ||
111 | + | ||
112 | + if (up1 && !up2) | ||
113 | + return false; | ||
114 | + else if (!up1 && up2) | ||
115 | + return true; | ||
116 | + | ||
117 | + return (s1->compare(*s2)<0); | ||
118 | +}; | ||
119 | +/* ################################################################### */ | ||
120 | +/* | ||
121 | +Makes a vector of strings from one string, | ||
122 | +splitting at (and excluding) delimiter "token". | ||
123 | +*/ | ||
124 | +static void SplitDelim( const std::string& s, const char token, std::vector<std::string*> * result) { | ||
125 | + std::string::const_iterator i = s.begin(); | ||
126 | + std::string::const_iterator j = s.begin(); | ||
127 | + const std::string::const_iterator e = s.end(); | ||
128 | + | ||
129 | + while(i!=e) { | ||
130 | + while(i!=e && *i++!=token); | ||
131 | + std::string *newstr = new std::string(j, i); | ||
132 | + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1); | ||
133 | + result->push_back(newstr); | ||
134 | + j = i; | ||
135 | + } | ||
136 | +}; | ||
137 | +/* ################################################################### */ | ||
138 | +// Variant that uses deep copies and references instead of pointers (less efficient). | ||
139 | +static void SplitDelim( const std::string& s, const char token, std::vector<std::string> & result) { | ||
140 | + std::string::const_iterator i = s.begin(); | ||
141 | + std::string::const_iterator j = s.begin(); | ||
142 | + const std::string::const_iterator e = s.end(); | ||
143 | + | ||
144 | + while(i!=e) { | ||
145 | + while(i!=e && *i++!=token); | ||
146 | + std::string newstr(j, i); | ||
147 | + if (newstr.at(newstr.size()-1) == token) newstr.erase(newstr.size()-1); | ||
148 | + result.push_back(newstr); | ||
149 | + j = i; | ||
150 | + } | ||
151 | +}; | ||
152 | +/* ################################################################### */ | ||
153 | +// Variant that uses list instead of vector for efficient insertion, etc. | ||
154 | +static void SplitDelim( const std::string& s, const char token, std::list<std::string*> & result) { | ||
155 | + std::string::const_iterator i = s.begin(); | ||
156 | + std::string::const_iterator j = s.begin(); | ||
157 | + const std::string::const_iterator e = s.end(); | ||
158 | + | ||
159 | + while(i!=e) { | ||
160 | + while(i!=e && *i++!=token); | ||
161 | + std::string *newstr = new std::string(j, i); | ||
162 | + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1); | ||
163 | + result.push_back(newstr); | ||
164 | + j = i; | ||
165 | + } | ||
166 | +}; | ||
167 | +/* ################################################################### */ | ||
168 | +static void ToU1(std::string ** strings, unsigned char * out, int n) { | ||
169 | + for(int i=0; i < n; ++i) { | ||
170 | + out[i] = (unsigned char)atoi(strings[i]->c_str()); | ||
171 | + } | ||
172 | +}; | ||
173 | +/* ################################################################### */ | ||
174 | +static void ToS1(std::string ** strings, char * out, int n) { | ||
175 | + for(int i=0; i < n; ++i) { | ||
176 | + out[i] = (char)atoi(strings[i]->c_str()); | ||
177 | + } | ||
178 | +}; | ||
179 | +/* ################################################################### */ | ||
180 | +static void ToU2(std::string ** strings, unsigned short * out, int n) { | ||
181 | + for(int i=0; i < n; ++i) { | ||
182 | + out[i] = (unsigned short)atoi(strings[i]->c_str()); | ||
183 | + } | ||
184 | +}; | ||
185 | +/* ################################################################### */ | ||
186 | +static void ToS2(std::string ** strings, short * out, int n) { | ||
187 | + for(int i=0; i < n; ++i) { | ||
188 | + out[i] = (short)atoi(strings[i]->c_str()); | ||
189 | + } | ||
190 | +}; | ||
191 | +/* ################################################################### */ | ||
192 | +static void ToS4(std::string ** strings, int * out, int n) { | ||
193 | + for(int i=0; i < n; ++i) { | ||
194 | + out[i] = atoi(strings[i]->c_str()); | ||
195 | + } | ||
196 | +}; | ||
197 | +/* ################################################################### */ | ||
198 | +static void ToU4(std::string ** strings, unsigned int * out, int n) { | ||
199 | + for(int i=0; i < n; ++i) { | ||
200 | + out[i] = (unsigned int)strtoul(strings[i]->c_str(), NULL, 0); | ||
201 | + } | ||
202 | +}; | ||
203 | +/* ################################################################### */ | ||
204 | +static void ToS8(std::string ** strings, long long * out, int n) { | ||
205 | + for(int i=0; i < n; ++i) { | ||
206 | + std::stringstream ss(strings[i]->c_str()); | ||
207 | + ss >> out[i]; | ||
208 | + } | ||
209 | +}; | ||
210 | +/* ################################################################### */ | ||
211 | +static void ToU8(std::string ** strings, unsigned long long * out, int n) { | ||
212 | + for(int i=0; i < n; ++i) { | ||
213 | + std::stringstream ss(strings[i]->c_str()); | ||
214 | + ss >> out[i]; | ||
215 | + } | ||
216 | +}; | ||
217 | +/* ################################################################### */ | ||
218 | +static void ToF(std::string ** strings, float * out, int n) { | ||
219 | + for(int i=0; i < n; ++i) { | ||
220 | + out[i] = (float)atof(strings[i]->c_str()); | ||
221 | + } | ||
222 | +}; | ||
223 | +/* ################################################################### */ | ||
224 | +static void ToD(std::string ** strings, double * out, int n) { | ||
225 | + for(int i=0; i < n; ++i) { | ||
226 | + out[i] = (double)atof(strings[i]->c_str()); | ||
227 | + } | ||
228 | +}; | ||
229 | +/* ################################################################### */ | ||
230 | +static void StringsToInts(std::vector<std::string> & strings, std::vector<int> & out) { | ||
231 | + for(int i=0; i < strings.size(); ++i) { | ||
232 | + out.push_back(atoi(strings[i].c_str())); | ||
233 | + } | ||
234 | +}; | ||
235 | +/* ################################################################### */ | ||
236 | +static void StringsToInts(std::vector<std::string*> * strings, std::vector<int> * out) { | ||
237 | + for(int i=0; i < strings->size(); ++i) { | ||
238 | + out->push_back(atoi(strings->at(i)->c_str())); | ||
239 | + } | ||
240 | +}; | ||
241 | +/* ################################################################### */ | ||
242 | +static void StringsToLongs(std::vector<std::string> & strings, std::vector<long> & out) { | ||
243 | + for(int i=0; i < strings.size(); ++i) { | ||
244 | + out.push_back(atol(strings[i].c_str())); | ||
245 | + } | ||
246 | +}; | ||
247 | +/* ################################################################### */ | ||
248 | +static void StringsToLongs(std::vector<std::string*> * strings, std::vector<long> * out) { | ||
249 | + for(int i=0; i < strings->size(); ++i) { | ||
250 | + out->push_back(atol(strings->at(i)->c_str())); | ||
251 | + } | ||
252 | +}; | ||
253 | +/* ################################################################### */ | ||
254 | +static void StringsToULongs(std::vector<std::string> & strings, std::vector<unsigned long> & out) { | ||
255 | + for(int i=0; i < strings.size(); ++i) { | ||
256 | + out.push_back(strtoul(strings[i].c_str(),0,0)); | ||
257 | + } | ||
258 | +}; | ||
259 | +/* ################################################################### */ | ||
260 | +static void StringsToULongs(std::vector<std::string*> * strings, std::vector<unsigned long> * out) { | ||
261 | + for(int i=0; i < strings->size(); ++i) { | ||
262 | + out->push_back(strtoul(strings->at(i)->c_str(),0,0)); | ||
263 | + } | ||
264 | +}; | ||
265 | +/* ################################################################### */ | ||
266 | +static void StringsToFloats(std::vector<std::string> & strings, std::vector<float> & out) { | ||
267 | + for(int i=0; i < strings.size(); ++i) { | ||
268 | + out.push_back(atof(strings[i].c_str())); | ||
269 | + } | ||
270 | +}; | ||
271 | +/* ################################################################### */ | ||
272 | +static void StringsToFloats(std::vector<std::string*> * strings, std::vector<float> * out) { | ||
273 | + for(int i=0; i < strings->size(); ++i) { | ||
274 | + out->push_back(atof(strings->at(i)->c_str())); | ||
275 | + } | ||
276 | +}; | ||
277 | +/* ################################################################### */ | ||
278 | +static void StringsToDoubles(std::vector<std::string> & strings, std::vector<double> & out) { | ||
279 | + for(int i=0; i < strings.size(); ++i) { | ||
280 | + out.push_back(atof(strings[i].c_str())); | ||
281 | + } | ||
282 | +}; | ||
283 | +/* ################################################################### */ | ||
284 | +static void StringsToDoubles(std::vector<std::string*> * strings, std::vector<double> * out) { | ||
285 | + for(int i=0; i < strings->size(); ++i) { | ||
286 | + out->push_back(atof(strings->at(i)->c_str())); | ||
287 | + } | ||
288 | +}; | ||
289 | +/* ################################################################### */ | ||
290 | +static void StringsToStrings(std::vector<std::string*> * strings, std::vector<std::string> * out) { | ||
291 | + for(int i=0; i < strings->size(); ++i) { | ||
292 | + out->push_back( *strings->at(i) ); | ||
293 | + } | ||
294 | +}; | ||
295 | +/* ################################################################### */ | ||
296 | +static void ToLowerASCII(std::string & s) { | ||
297 | + int n = s.size(); | ||
298 | + int i=0; | ||
299 | + char c; | ||
300 | + for(; i < n; ++i) { | ||
301 | + c = s[i]; | ||
302 | + if(c<='Z' && c>='A') | ||
303 | + s[i] = c+32; | ||
304 | + } | ||
305 | +} | ||
306 | +/* ################################################################### */ | ||
307 | +static char** CommandLineToArgvA(char* CmdLine, int* _argc) { | ||
308 | + char** argv; | ||
309 | + char* _argv; | ||
310 | + unsigned long len; | ||
311 | + unsigned long argc; | ||
312 | + char a; | ||
313 | + unsigned long i, j; | ||
314 | + | ||
315 | + bool in_QM; | ||
316 | + bool in_TEXT; | ||
317 | + bool in_SPACE; | ||
318 | + | ||
319 | + len = strlen(CmdLine); | ||
320 | + i = ((len+2)/2)*sizeof(void*) + sizeof(void*); | ||
321 | + | ||
322 | + argv = (char**)malloc(i + (len+2)*sizeof(char)); | ||
323 | + | ||
324 | + _argv = (char*)(((unsigned char*)argv)+i); | ||
325 | + | ||
326 | + argc = 0; | ||
327 | + argv[argc] = _argv; | ||
328 | + in_QM = false; | ||
329 | + in_TEXT = false; | ||
330 | + in_SPACE = true; | ||
331 | + i = 0; | ||
332 | + j = 0; | ||
333 | + | ||
334 | + while( a = CmdLine[i] ) { | ||
335 | + if(in_QM) { | ||
336 | + if( (a == '\"') || | ||
337 | + (a == '\'')) // rsz. Added single quote. | ||
338 | + { | ||
339 | + in_QM = false; | ||
340 | + } else { | ||
341 | + _argv[j] = a; | ||
342 | + j++; | ||
343 | + } | ||
344 | + } else { | ||
345 | + switch(a) { | ||
346 | + case '\"': | ||
347 | + case '\'': // rsz. Added single quote. | ||
348 | + in_QM = true; | ||
349 | + in_TEXT = true; | ||
350 | + if(in_SPACE) { | ||
351 | + argv[argc] = _argv+j; | ||
352 | + argc++; | ||
353 | + } | ||
354 | + in_SPACE = false; | ||
355 | + break; | ||
356 | + case ' ': | ||
357 | + case '\t': | ||
358 | + case '\n': | ||
359 | + case '\r': | ||
360 | + if(in_TEXT) { | ||
361 | + _argv[j] = '\0'; | ||
362 | + j++; | ||
363 | + } | ||
364 | + in_TEXT = false; | ||
365 | + in_SPACE = true; | ||
366 | + break; | ||
367 | + default: | ||
368 | + in_TEXT = true; | ||
369 | + if(in_SPACE) { | ||
370 | + argv[argc] = _argv+j; | ||
371 | + argc++; | ||
372 | + } | ||
373 | + _argv[j] = a; | ||
374 | + j++; | ||
375 | + in_SPACE = false; | ||
376 | + break; | ||
377 | + } | ||
378 | + } | ||
379 | + i++; | ||
380 | + } | ||
381 | + _argv[j] = '\0'; | ||
382 | + argv[argc] = NULL; | ||
383 | + | ||
384 | + (*_argc) = argc; | ||
385 | + return argv; | ||
386 | +}; | ||
387 | +/* ################################################################### */ | ||
388 | +// Create unique ids with static and still allow single header that avoids multiple definitions linker error. | ||
389 | +class ezOptionParserIDGenerator { | ||
390 | +public: | ||
391 | + static ezOptionParserIDGenerator& instance () { static ezOptionParserIDGenerator Generator; return Generator; } | ||
392 | + short next () { return ++_id; } | ||
393 | +private: | ||
394 | + ezOptionParserIDGenerator() : _id(-1) {} | ||
395 | + short _id; | ||
396 | +}; | ||
397 | +/* ################################################################### */ | ||
398 | +/* Validate a value by checking: | ||
399 | +- if as string, see if converted value is within datatype's limits, | ||
400 | +- and see if falls within a desired range, | ||
401 | +- or see if within set of given list of values. | ||
402 | + | ||
403 | +If comparing with a range, the values list must contain one or two values. One value is required when comparing with <, <=, >, >=. Use two values when requiring a test such as <x<, <=x<, <x<=, <=x<=. | ||
404 | +A regcomp/regexec based class could be created in the future if a need arises. | ||
405 | +*/ | ||
406 | +class ezOptionValidator { | ||
407 | +public: | ||
408 | + inline ezOptionValidator(const char* _type, const char* _op=0, const char* list=0, bool _insensitive=false); | ||
409 | + inline ezOptionValidator(char _type); | ||
410 | + inline ezOptionValidator(char _type, char _op, const char* list, int _size); | ||
411 | + inline ezOptionValidator(char _type, char _op, const unsigned char* list, int _size); | ||
412 | + inline ezOptionValidator(char _type, char _op, const short* list, int _size); | ||
413 | + inline ezOptionValidator(char _type, char _op, const unsigned short* list, int _size); | ||
414 | + inline ezOptionValidator(char _type, char _op, const int* list, int _size); | ||
415 | + inline ezOptionValidator(char _type, char _op, const unsigned int* list, int _size); | ||
416 | + inline ezOptionValidator(char _type, char _op, const long long* list, int _size); | ||
417 | + inline ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size=0); | ||
418 | + inline ezOptionValidator(char _type, char _op, const float* list, int _size); | ||
419 | + inline ezOptionValidator(char _type, char _op, const double* list, int _size); | ||
420 | + inline ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive); | ||
421 | + inline ~ezOptionValidator(); | ||
422 | + | ||
423 | + inline bool isValid(const std::string * value); | ||
424 | + inline void print(); | ||
425 | + inline void reset(); | ||
426 | + | ||
427 | + /* If value must be in custom range, use these comparison modes. */ | ||
428 | + enum OP { NOOP=0, | ||
429 | + LT, /* value < list[0] */ | ||
430 | + LE, /* value <= list[0] */ | ||
431 | + GT, /* value > list[0] */ | ||
432 | + GE, /* value >= list[0] */ | ||
433 | + GTLT, /* list[0] < value < list[1] */ | ||
434 | + GELT, /* list[0] <= value < list[1] */ | ||
435 | + GELE, /* list[0] <= value <= list[1] */ | ||
436 | + GTLE, /* list[0] < value <= list[1] */ | ||
437 | + IN /* if value is in list */ | ||
438 | + }; | ||
439 | + | ||
440 | + enum TYPE { NOTYPE=0, S1, U1, S2, U2, S4, U4, S8, U8, F, D, T }; | ||
441 | + enum TYPE2 { NOTYPE2=0, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT, DOUBLE, TEXT }; | ||
442 | + | ||
443 | + union { | ||
444 | + unsigned char *u1; | ||
445 | + char *s1; | ||
446 | + unsigned short *u2; | ||
447 | + short *s2; | ||
448 | + unsigned int *u4; | ||
449 | + int *s4; | ||
450 | + unsigned long long *u8; | ||
451 | + long long *s8; | ||
452 | + float *f; | ||
453 | + double *d; | ||
454 | + std::string** t; | ||
455 | + }; | ||
456 | + | ||
457 | + char op; | ||
458 | + bool quiet; | ||
459 | + short id; | ||
460 | + char type; | ||
461 | + int size; | ||
462 | + bool insensitive; | ||
463 | +}; | ||
464 | +/* ------------------------------------------------------------------- */ | ||
465 | +ezOptionValidator::~ezOptionValidator() { | ||
466 | + reset(); | ||
467 | +}; | ||
468 | +/* ------------------------------------------------------------------- */ | ||
469 | +void ezOptionValidator::reset() { | ||
470 | + #define CLEAR(TYPE,P) case TYPE: if (P) delete [] P; P = 0; break; | ||
471 | + switch(type) { | ||
472 | + CLEAR(S1,s1); | ||
473 | + CLEAR(U1,u1); | ||
474 | + CLEAR(S2,s2); | ||
475 | + CLEAR(U2,u2); | ||
476 | + CLEAR(S4,s4); | ||
477 | + CLEAR(U4,u4); | ||
478 | + CLEAR(S8,s8); | ||
479 | + CLEAR(U8,u8); | ||
480 | + CLEAR(F,f); | ||
481 | + CLEAR(D,d); | ||
482 | + case T: | ||
483 | + for(int i=0; i < size; ++i) | ||
484 | + delete t[i]; | ||
485 | + | ||
486 | + delete [] t; | ||
487 | + t = 0; | ||
488 | + break; | ||
489 | + default: break; | ||
490 | + } | ||
491 | + | ||
492 | + size = 0; | ||
493 | + op = NOOP; | ||
494 | + type = NOTYPE; | ||
495 | +}; | ||
496 | +/* ------------------------------------------------------------------- */ | ||
497 | +ezOptionValidator::ezOptionValidator(char _type) : insensitive(0), op(0), size(0), s1(0), type(_type), quiet(0) { | ||
498 | + id = ezOptionParserIDGenerator::instance().next(); | ||
499 | +}; | ||
500 | +/* ------------------------------------------------------------------- */ | ||
501 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const char* list, int _size) : insensitive(0), op(_op), size(_size), s1(0), type(_type), quiet(0) { | ||
502 | + id = ezOptionParserIDGenerator::instance().next(); | ||
503 | + s1 = new char[size]; | ||
504 | + memcpy(s1, list, size); | ||
505 | +}; | ||
506 | +/* ------------------------------------------------------------------- */ | ||
507 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned char* list, int _size) : insensitive(0), op(_op), size(_size), u1(0), type(_type), quiet(0) { | ||
508 | + id = ezOptionParserIDGenerator::instance().next(); | ||
509 | + u1 = new unsigned char[size]; | ||
510 | + memcpy(u1, list, size); | ||
511 | +}; | ||
512 | +/* ------------------------------------------------------------------- */ | ||
513 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const short* list, int _size) : insensitive(0), op(_op), size(_size), s2(0), type(_type), quiet(0) { | ||
514 | + id = ezOptionParserIDGenerator::instance().next(); | ||
515 | + s2 = new short[size]; | ||
516 | + memcpy(s2, list, size*sizeof(short)); | ||
517 | +}; | ||
518 | +/* ------------------------------------------------------------------- */ | ||
519 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned short* list, int _size) : insensitive(0), op(_op), size(_size), u2(0), type(_type), quiet(0) { | ||
520 | + id = ezOptionParserIDGenerator::instance().next(); | ||
521 | + u2 = new unsigned short[size]; | ||
522 | + memcpy(u2, list, size*sizeof(unsigned short)); | ||
523 | +}; | ||
524 | +/* ------------------------------------------------------------------- */ | ||
525 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const int* list, int _size) : insensitive(0), op(_op), size(_size), s4(0), type(_type), quiet(0) { | ||
526 | + id = ezOptionParserIDGenerator::instance().next(); | ||
527 | + s4 = new int[size]; | ||
528 | + memcpy(s4, list, size*sizeof(int)); | ||
529 | +}; | ||
530 | +/* ------------------------------------------------------------------- */ | ||
531 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned int* list, int _size) : insensitive(0), op(_op), size(_size), u4(0), type(_type), quiet(0) { | ||
532 | + id = ezOptionParserIDGenerator::instance().next(); | ||
533 | + u4 = new unsigned int[size]; | ||
534 | + memcpy(u4, list, size*sizeof(unsigned int)); | ||
535 | +}; | ||
536 | +/* ------------------------------------------------------------------- */ | ||
537 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const long long* list, int _size) : insensitive(0), op(_op), size(_size), s8(0), type(_type), quiet(0) { | ||
538 | + id = ezOptionParserIDGenerator::instance().next(); | ||
539 | + s8 = new long long[size]; | ||
540 | + memcpy(s8, list, size*sizeof(long long)); | ||
541 | +}; | ||
542 | +/* ------------------------------------------------------------------- */ | ||
543 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size) : insensitive(0), op(_op), size(_size), u8(0), type(_type), quiet(0) { | ||
544 | + id = ezOptionParserIDGenerator::instance().next(); | ||
545 | + u8 = new unsigned long long[size]; | ||
546 | + memcpy(u8, list, size*sizeof(unsigned long long)); | ||
547 | +}; | ||
548 | +/* ------------------------------------------------------------------- */ | ||
549 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const float* list, int _size) : insensitive(0), op(_op), size(_size), f(0), type(_type), quiet(0) { | ||
550 | + id = ezOptionParserIDGenerator::instance().next(); | ||
551 | + f = new float[size]; | ||
552 | + memcpy(f, list, size*sizeof(float)); | ||
553 | +}; | ||
554 | +/* ------------------------------------------------------------------- */ | ||
555 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const double* list, int _size) : insensitive(0), op(_op), size(_size), d(0), type(_type), quiet(0) { | ||
556 | + id = ezOptionParserIDGenerator::instance().next(); | ||
557 | + d = new double[size]; | ||
558 | + memcpy(d, list, size*sizeof(double)); | ||
559 | +}; | ||
560 | +/* ------------------------------------------------------------------- */ | ||
561 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive) : insensitive(_insensitive), op(_op), size(_size), t(0), type(_type), quiet(0) { | ||
562 | + id = ezOptionParserIDGenerator::instance().next(); | ||
563 | + t = new std::string*[size]; | ||
564 | + int i=0; | ||
565 | + | ||
566 | + for(; i < size; ++i) { | ||
567 | + t[i] = new std::string(list[i]); | ||
568 | + } | ||
569 | +}; | ||
570 | +/* ------------------------------------------------------------------- */ | ||
571 | +/* Less efficient but convenient ctor that parses strings to setup validator. | ||
572 | +_type: s1, u1, s2, u2, ..., f, d, t | ||
573 | +_op: lt, gt, ..., in | ||
574 | +_list: comma-delimited string | ||
575 | +*/ | ||
576 | +ezOptionValidator::ezOptionValidator(const char* _type, const char* _op, const char* _list, bool _insensitive) : insensitive(_insensitive), size(0), t(0), type(0), quiet(0) { | ||
577 | + id = ezOptionParserIDGenerator::instance().next(); | ||
578 | + | ||
579 | + switch(_type[0]) { | ||
580 | + case 'u': | ||
581 | + switch(_type[1]) { | ||
582 | + case '1': type = U1; break; | ||
583 | + case '2': type = U2; break; | ||
584 | + case '4': type = U4; break; | ||
585 | + case '8': type = U8; break; | ||
586 | + default: break; | ||
587 | + } | ||
588 | + break; | ||
589 | + case 's': | ||
590 | + switch(_type[1]) { | ||
591 | + case '1': type = S1; | ||
592 | + break; | ||
593 | + case '2': type = S2; break; | ||
594 | + case '4': type = S4; break; | ||
595 | + case '8': type = S8; break; | ||
596 | + default: break; | ||
597 | + } | ||
598 | + break; | ||
599 | + case 'f': type = F; break; | ||
600 | + case 'd': type = D; break; | ||
601 | + case 't': type = T; break; | ||
602 | + default: | ||
603 | + if (!quiet) | ||
604 | + std::cerr << "ERROR: Unknown validator datatype \"" << _type << "\".\n"; | ||
605 | + break; | ||
606 | + } | ||
607 | + | ||
608 | + int nop = 0; | ||
609 | + if (_op != 0) | ||
610 | + nop = strlen(_op); | ||
611 | + | ||
612 | + switch(nop) { | ||
613 | + case 0: op = NOOP; break; | ||
614 | + case 2: | ||
615 | + switch(_op[0]) { | ||
616 | + case 'g': | ||
617 | + switch(_op[1]) { | ||
618 | + case 'e': op = GE; break; | ||
619 | + default: op = GT; break; | ||
620 | + } | ||
621 | + break; | ||
622 | + case 'i': op = IN; | ||
623 | + break; | ||
624 | + default: | ||
625 | + switch(_op[1]) { | ||
626 | + case 'e': op = LE; break; | ||
627 | + default: op = LT; break; | ||
628 | + } | ||
629 | + break; | ||
630 | + } | ||
631 | + break; | ||
632 | + case 4: | ||
633 | + switch(_op[1]) { | ||
634 | + case 'e': | ||
635 | + switch(_op[3]) { | ||
636 | + case 'e': op = GELE; break; | ||
637 | + default: op = GELT; break; | ||
638 | + } | ||
639 | + break; | ||
640 | + default: | ||
641 | + switch(_op[3]) { | ||
642 | + case 'e': op = GTLE; break; | ||
643 | + default: op = GTLT; break; | ||
644 | + } | ||
645 | + break; | ||
646 | + } | ||
647 | + break; | ||
648 | + default: | ||
649 | + if (!quiet) | ||
650 | + std::cerr << "ERROR: Unknown validator operation \"" << _op << "\".\n"; | ||
651 | + break; | ||
652 | + } | ||
653 | + | ||
654 | + if (_list == 0) return; | ||
655 | + // Create list of strings and then cast to native datatypes. | ||
656 | + std::string unsplit(_list); | ||
657 | + std::list<std::string*> split; | ||
658 | + std::list<std::string*>::iterator it; | ||
659 | + SplitDelim(unsplit, ',', split); | ||
660 | + size = split.size(); | ||
661 | + std::string **strings = new std::string*[size]; | ||
662 | + | ||
663 | + int i = 0; | ||
664 | + for(it = split.begin(); it != split.end(); ++it) | ||
665 | + strings[i++] = *it; | ||
666 | + | ||
667 | + if (insensitive) | ||
668 | + for(i=0; i < size; ++i) | ||
669 | + ToLowerASCII(*strings[i]); | ||
670 | + | ||
671 | + #define FreeStrings() { \ | ||
672 | + for(i=0; i < size; ++i)\ | ||
673 | + delete strings[i];\ | ||
674 | + delete [] strings;\ | ||
675 | + } | ||
676 | + | ||
677 | + #define ToArray(T,P,Y) case T: P = new Y[size]; To##T(strings, P, size); FreeStrings(); break; | ||
678 | + switch(type) { | ||
679 | + ToArray(S1,s1,char); | ||
680 | + ToArray(U1,u1,unsigned char); | ||
681 | + ToArray(S2,s2,short); | ||
682 | + ToArray(U2,u2,unsigned short); | ||
683 | + ToArray(S4,s4,int); | ||
684 | + ToArray(U4,u4,unsigned int); | ||
685 | + ToArray(S8,s8,long long); | ||
686 | + ToArray(U8,u8,unsigned long long); | ||
687 | + ToArray(F,f,float); | ||
688 | + ToArray(D,d,double); | ||
689 | + case T: t = strings; break; /* Don't erase strings array. */ | ||
690 | + default: break; | ||
691 | + } | ||
692 | +}; | ||
693 | +/* ------------------------------------------------------------------- */ | ||
694 | +void ezOptionValidator::print() { | ||
695 | + printf("id=%d, op=%d, type=%d, size=%d, insensitive=%d\n", id, op, type, size, insensitive); | ||
696 | +}; | ||
697 | +/* ------------------------------------------------------------------- */ | ||
698 | +bool ezOptionValidator::isValid(const std::string * valueAsString) { | ||
699 | + if (valueAsString == 0) return false; | ||
700 | + | ||
701 | +#define CHECKRANGE(E,T) {\ | ||
702 | + std::stringstream ss(valueAsString->c_str()); \ | ||
703 | + long long E##value; \ | ||
704 | + ss >> E##value; \ | ||
705 | + long long E##min = static_cast<long long>(std::numeric_limits<T>::min()); \ | ||
706 | + if (E##value < E##min) { \ | ||
707 | + if (!quiet) \ | ||
708 | + std::cerr << "ERROR: Invalid value " << E##value << " is less than datatype min " << E##min << ".\n"; \ | ||
709 | + return false; \ | ||
710 | + } \ | ||
711 | + \ | ||
712 | + long long E##max = static_cast<long long>(std::numeric_limits<T>::max()); \ | ||
713 | + if (E##value > E##max) { \ | ||
714 | + if (!quiet) \ | ||
715 | + std::cerr << "ERROR: Invalid value " << E##value << " is greater than datatype max " << E##max << ".\n"; \ | ||
716 | + return false; \ | ||
717 | + } \ | ||
718 | +} | ||
719 | + // Check if within datatype limits. | ||
720 | + if (type != T) { | ||
721 | + switch(type) { | ||
722 | + case S1: CHECKRANGE(S1,char); break; | ||
723 | + case U1: CHECKRANGE(U1,unsigned char); break; | ||
724 | + case S2: CHECKRANGE(S2,short); break; | ||
725 | + case U2: CHECKRANGE(U2,unsigned short); break; | ||
726 | + case S4: CHECKRANGE(S4,int); break; | ||
727 | + case U4: CHECKRANGE(U4,unsigned int); break; | ||
728 | + case S8: { | ||
729 | + if ( (valueAsString->at(0) == '-') && | ||
730 | + isdigit(valueAsString,1) && | ||
731 | + (valueAsString->size() > 19) && | ||
732 | + (valueAsString->compare(1, 19, "9223372036854775808") > 0) ) { | ||
733 | + if (!quiet) | ||
734 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min -9223372036854775808.\n"; | ||
735 | + return false; | ||
736 | + } | ||
737 | + | ||
738 | + if (isdigit(valueAsString) && | ||
739 | + (valueAsString->size() > 18) && | ||
740 | + valueAsString->compare("9223372036854775807") > 0) { | ||
741 | + if (!quiet) | ||
742 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 9223372036854775807.\n"; | ||
743 | + return false; | ||
744 | + } | ||
745 | + } break; | ||
746 | + case U8: { | ||
747 | + if (valueAsString->compare("0") < 0) { | ||
748 | + if (!quiet) | ||
749 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min 0.\n"; | ||
750 | + return false; | ||
751 | + } | ||
752 | + | ||
753 | + if (isdigit(valueAsString) && | ||
754 | + (valueAsString->size() > 19) && | ||
755 | + valueAsString->compare("18446744073709551615") > 0) { | ||
756 | + if (!quiet) | ||
757 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 18446744073709551615.\n"; | ||
758 | + return false; | ||
759 | + } | ||
760 | + } break; | ||
761 | + case F: { | ||
762 | + double dmax = static_cast<double>(std::numeric_limits<float>::max()); | ||
763 | + double dvalue = atof(valueAsString->c_str()); | ||
764 | + double dmin = -dmax; | ||
765 | + if (dvalue < dmin) { | ||
766 | + if (!quiet) { | ||
767 | + fprintf(stderr, "ERROR: Invalid value %g is less than datatype min %g.\n", dvalue, dmin); | ||
768 | + } | ||
769 | + return false; | ||
770 | + } | ||
771 | + | ||
772 | + if (dvalue > dmax) { | ||
773 | + if (!quiet) | ||
774 | + std::cerr << "ERROR: Invalid value " << dvalue << " is greater than datatype max " << dmax << ".\n"; | ||
775 | + return false; | ||
776 | + } | ||
777 | + } break; | ||
778 | + case D: { | ||
779 | + long double ldmax = static_cast<long double>(std::numeric_limits<double>::max()); | ||
780 | + std::stringstream ss(valueAsString->c_str()); | ||
781 | + long double ldvalue; | ||
782 | + ss >> ldvalue; | ||
783 | + long double ldmin = -ldmax; | ||
784 | + | ||
785 | + if (ldvalue < ldmin) { | ||
786 | + if (!quiet) | ||
787 | + std::cerr << "ERROR: Invalid value " << ldvalue << " is less than datatype min " << ldmin << ".\n"; | ||
788 | + return false; | ||
789 | + } | ||
790 | + | ||
791 | + if (ldvalue > ldmax) { | ||
792 | + if (!quiet) | ||
793 | + std::cerr << "ERROR: Invalid value " << ldvalue << " is greater than datatype max " << ldmax << ".\n"; | ||
794 | + return false; | ||
795 | + } | ||
796 | + } break; | ||
797 | + case NOTYPE: default: break; | ||
798 | + } | ||
799 | + } else { | ||
800 | + if (op == IN) { | ||
801 | + int i=0; | ||
802 | + if (insensitive) { | ||
803 | + std::string valueAsStringLower(*valueAsString); | ||
804 | + ToLowerASCII(valueAsStringLower); | ||
805 | + for(; i < size; ++i) { | ||
806 | + if (valueAsStringLower.compare(t[i]->c_str()) == 0) | ||
807 | + return true; | ||
808 | + } | ||
809 | + } else { | ||
810 | + for(; i < size; ++i) { | ||
811 | + if (valueAsString->compare(t[i]->c_str()) == 0) | ||
812 | + return true; | ||
813 | + } | ||
814 | + } | ||
815 | + return false; | ||
816 | + } | ||
817 | + } | ||
818 | + | ||
819 | + // Only check datatype limits, and return; | ||
820 | + if (op == NOOP) return true; | ||
821 | + | ||
822 | +#define VALIDATE(T, U, LIST) { \ | ||
823 | + /* Value string converted to true native type. */ \ | ||
824 | + std::stringstream ss(valueAsString->c_str());\ | ||
825 | + U v;\ | ||
826 | + ss >> v;\ | ||
827 | + /* Check if within list. */ \ | ||
828 | + if (op == IN) { \ | ||
829 | + T * last = LIST + size;\ | ||
830 | + return (last != std::find(LIST, last, v)); \ | ||
831 | + } \ | ||
832 | + \ | ||
833 | + /* Check if within user's custom range. */ \ | ||
834 | + T v0, v1; \ | ||
835 | + if (size > 0) { \ | ||
836 | + v0 = LIST[0]; \ | ||
837 | + } \ | ||
838 | + \ | ||
839 | + if (size > 1) { \ | ||
840 | + v1 = LIST[1]; \ | ||
841 | + } \ | ||
842 | + \ | ||
843 | + switch (op) {\ | ||
844 | + case LT:\ | ||
845 | + if (size > 0) {\ | ||
846 | + return v < v0;\ | ||
847 | + } else {\ | ||
848 | + std::cerr << "ERROR: No value given to validate if " << v << " < X.\n";\ | ||
849 | + return false;\ | ||
850 | + }\ | ||
851 | + break;\ | ||
852 | + case LE:\ | ||
853 | + if (size > 0) {\ | ||
854 | + return v <= v0;\ | ||
855 | + } else {\ | ||
856 | + std::cerr << "ERROR: No value given to validate if " << v << " <= X.\n";\ | ||
857 | + return false;\ | ||
858 | + }\ | ||
859 | + break;\ | ||
860 | + case GT:\ | ||
861 | + if (size > 0) {\ | ||
862 | + return v > v0;\ | ||
863 | + } else {\ | ||
864 | + std::cerr << "ERROR: No value given to validate if " << v << " > X.\n";\ | ||
865 | + return false;\ | ||
866 | + }\ | ||
867 | + break;\ | ||
868 | + case GE:\ | ||
869 | + if (size > 0) {\ | ||
870 | + return v >= v0;\ | ||
871 | + } else {\ | ||
872 | + std::cerr << "ERROR: No value given to validate if " << v << " >= X.\n";\ | ||
873 | + return false;\ | ||
874 | + }\ | ||
875 | + break;\ | ||
876 | + case GTLT:\ | ||
877 | + if (size > 1) {\ | ||
878 | + return (v0 < v) && (v < v1);\ | ||
879 | + } else {\ | ||
880 | + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " < X2.\n";\ | ||
881 | + return false;\ | ||
882 | + }\ | ||
883 | + break;\ | ||
884 | + case GELT:\ | ||
885 | + if (size > 1) {\ | ||
886 | + return (v0 <= v) && (v < v1);\ | ||
887 | + } else {\ | ||
888 | + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " < X2.\n";\ | ||
889 | + return false;\ | ||
890 | + }\ | ||
891 | + break;\ | ||
892 | + case GELE:\ | ||
893 | + if (size > 1) {\ | ||
894 | + return (v0 <= v) && (v <= v1);\ | ||
895 | + } else {\ | ||
896 | + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " <= X2.\n";\ | ||
897 | + return false;\ | ||
898 | + }\ | ||
899 | + break;\ | ||
900 | + case GTLE:\ | ||
901 | + if (size > 1) {\ | ||
902 | + return (v0 < v) && (v <= v1);\ | ||
903 | + } else {\ | ||
904 | + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " <= X2.\n";\ | ||
905 | + return false;\ | ||
906 | + }\ | ||
907 | + break;\ | ||
908 | + case NOOP: case IN: default: break;\ | ||
909 | + } \ | ||
910 | + } | ||
911 | + | ||
912 | + switch(type) { | ||
913 | + case U1: VALIDATE(unsigned char, int, u1); break; | ||
914 | + case S1: VALIDATE(char, int, s1); break; | ||
915 | + case U2: VALIDATE(unsigned short, int, u2); break; | ||
916 | + case S2: VALIDATE(short, int, s2); break; | ||
917 | + case U4: VALIDATE(unsigned int, unsigned int, u4); break; | ||
918 | + case S4: VALIDATE(int, int, s4); break; | ||
919 | + case U8: VALIDATE(unsigned long long, unsigned long long, u8); break; | ||
920 | + case S8: VALIDATE(long long, long long, s8); break; | ||
921 | + case F: VALIDATE(float, float, f); break; | ||
922 | + case D: VALIDATE(double, double, d); break; | ||
923 | + default: break; | ||
924 | + } | ||
925 | + | ||
926 | + return true; | ||
927 | +}; | ||
928 | +/* ################################################################### */ | ||
929 | +class OptionGroup { | ||
930 | +public: | ||
931 | + OptionGroup() : delim(0), expectArgs(0), isSet(false), isRequired(false) { } | ||
932 | + | ||
933 | + ~OptionGroup() { | ||
934 | + int i, j; | ||
935 | + for(i=0; i < flags.size(); ++i) | ||
936 | + delete flags[i]; | ||
937 | + | ||
938 | + flags.clear(); | ||
939 | + parseIndex.clear(); | ||
940 | + clearArgs(); | ||
941 | + }; | ||
942 | + | ||
943 | + inline void clearArgs(); | ||
944 | + inline void getInt(int&); | ||
945 | + inline void getLong(long&); | ||
946 | + inline void getLongLong(long long&); | ||
947 | + inline void getULong(unsigned long&); | ||
948 | + inline void getULongLong(unsigned long long&); | ||
949 | + inline void getFloat(float&); | ||
950 | + inline void getDouble(double&); | ||
951 | + inline void getString(std::string&); | ||
952 | + inline void getInts(std::vector<int>&); | ||
953 | + inline void getLongs(std::vector<long>&); | ||
954 | + inline void getULongs(std::vector<unsigned long>&); | ||
955 | + inline void getFloats(std::vector<float>&); | ||
956 | + inline void getDoubles(std::vector<double>&); | ||
957 | + inline void getStrings(std::vector<std::string>&); | ||
958 | + inline void getMultiInts(std::vector< std::vector<int> >&); | ||
959 | + inline void getMultiLongs(std::vector< std::vector<long> >&); | ||
960 | + inline void getMultiULongs(std::vector< std::vector<unsigned long> >&); | ||
961 | + inline void getMultiFloats(std::vector< std::vector<float> >&); | ||
962 | + inline void getMultiDoubles(std::vector< std::vector<double> >&); | ||
963 | + inline void getMultiStrings(std::vector< std::vector<std::string> >&); | ||
964 | + | ||
965 | + // defaults value regardless of being set by user. | ||
966 | + std::string defaults; | ||
967 | + // If expects arguments, this will delimit arg list. | ||
968 | + char delim; | ||
969 | + // If not 0, then number of delimited args. -1 for arbitrary number. | ||
970 | + int expectArgs; | ||
971 | + // Descriptive help message shown in usage instructions for option. | ||
972 | + std::string help; | ||
973 | + // 0 or 1. | ||
974 | + bool isRequired; | ||
975 | + // A list of flags that denote this option, i.e. -d, --dimension. | ||
976 | + std::vector< std::string* > flags; | ||
977 | + // If was set (or found). | ||
978 | + bool isSet; | ||
979 | + // Lists of arguments, per flag instance, after splitting by delimiter. | ||
980 | + std::vector< std::vector< std::string* > * > args; | ||
981 | + // Index where each group was parsed from input stream to track order. | ||
982 | + std::vector<int> parseIndex; | ||
983 | +}; | ||
984 | +/* ################################################################### */ | ||
985 | +void OptionGroup::clearArgs() { | ||
986 | + int i,j; | ||
987 | + for(i=0; i < args.size(); ++i) { | ||
988 | + for(j=0; j < args[i]->size(); ++j) | ||
989 | + delete args[i]->at(j); | ||
990 | + | ||
991 | + delete args[i]; | ||
992 | + } | ||
993 | + | ||
994 | + args.clear(); | ||
995 | + isSet = false; | ||
996 | +}; | ||
997 | +/* ################################################################### */ | ||
998 | +void OptionGroup::getInt(int & out) { | ||
999 | + if (!isSet) { | ||
1000 | + if (defaults.empty()) | ||
1001 | + out = 0; | ||
1002 | + else | ||
1003 | + out = atoi(defaults.c_str()); | ||
1004 | + } else { | ||
1005 | + if (args.empty() || args[0]->empty()) | ||
1006 | + out = 0; | ||
1007 | + else { | ||
1008 | + out = atoi(args[0]->at(0)->c_str()); | ||
1009 | + } | ||
1010 | + } | ||
1011 | +}; | ||
1012 | +/* ################################################################### */ | ||
1013 | +void OptionGroup::getLong(long & out) { | ||
1014 | + if (!isSet) { | ||
1015 | + if (defaults.empty()) | ||
1016 | + out = 0; | ||
1017 | + else | ||
1018 | + out = atoi(defaults.c_str()); | ||
1019 | + } else { | ||
1020 | + if (args.empty() || args[0]->empty()) | ||
1021 | + out = 0; | ||
1022 | + else { | ||
1023 | + out = atol(args[0]->at(0)->c_str()); | ||
1024 | + } | ||
1025 | + } | ||
1026 | +}; | ||
1027 | +/* ################################################################### */ | ||
1028 | +void OptionGroup::getLongLong(long long & out) { | ||
1029 | + if (!isSet) { | ||
1030 | + if (defaults.empty()) | ||
1031 | + out = 0; | ||
1032 | + else { | ||
1033 | + std::stringstream ss(defaults.c_str()); | ||
1034 | + ss >> out; | ||
1035 | + } | ||
1036 | + } else { | ||
1037 | + if (args.empty() || args[0]->empty()) | ||
1038 | + out = 0; | ||
1039 | + else { | ||
1040 | + std::stringstream ss(args[0]->at(0)->c_str()); | ||
1041 | + ss >> out; | ||
1042 | + } | ||
1043 | + } | ||
1044 | +}; | ||
1045 | +/* ################################################################### */ | ||
1046 | +void OptionGroup::getULong(unsigned long & out) { | ||
1047 | + if (!isSet) { | ||
1048 | + if (defaults.empty()) | ||
1049 | + out = 0; | ||
1050 | + else | ||
1051 | + out = atoi(defaults.c_str()); | ||
1052 | + } else { | ||
1053 | + if (args.empty() || args[0]->empty()) | ||
1054 | + out = 0; | ||
1055 | + else { | ||
1056 | + out = strtoul(args[0]->at(0)->c_str(),0,0); | ||
1057 | + } | ||
1058 | + } | ||
1059 | +}; | ||
1060 | +/* ################################################################### */ | ||
1061 | +void OptionGroup::getULongLong(unsigned long long & out) { | ||
1062 | + if (!isSet) { | ||
1063 | + if (defaults.empty()) | ||
1064 | + out = 0; | ||
1065 | + else { | ||
1066 | + std::stringstream ss(defaults.c_str()); | ||
1067 | + ss >> out; | ||
1068 | + } | ||
1069 | + } else { | ||
1070 | + if (args.empty() || args[0]->empty()) | ||
1071 | + out = 0; | ||
1072 | + else { | ||
1073 | + std::stringstream ss(args[0]->at(0)->c_str()); | ||
1074 | + ss >> out; | ||
1075 | + } | ||
1076 | + } | ||
1077 | +}; | ||
1078 | +/* ################################################################### */ | ||
1079 | +void OptionGroup::getFloat(float & out) { | ||
1080 | + if (!isSet) { | ||
1081 | + if (defaults.empty()) | ||
1082 | + out = 0.0; | ||
1083 | + else | ||
1084 | + out = (float)atof(defaults.c_str()); | ||
1085 | + } else { | ||
1086 | + if (args.empty() || args[0]->empty()) | ||
1087 | + out = 0.0; | ||
1088 | + else { | ||
1089 | + out = (float)atof(args[0]->at(0)->c_str()); | ||
1090 | + } | ||
1091 | + } | ||
1092 | +}; | ||
1093 | +/* ################################################################### */ | ||
1094 | +void OptionGroup::getDouble(double & out) { | ||
1095 | + if (!isSet) { | ||
1096 | + if (defaults.empty()) | ||
1097 | + out = 0.0; | ||
1098 | + else | ||
1099 | + out = atof(defaults.c_str()); | ||
1100 | + } else { | ||
1101 | + if (args.empty() || args[0]->empty()) | ||
1102 | + out = 0.0; | ||
1103 | + else { | ||
1104 | + out = atof(args[0]->at(0)->c_str()); | ||
1105 | + } | ||
1106 | + } | ||
1107 | +}; | ||
1108 | +/* ################################################################### */ | ||
1109 | +void OptionGroup::getString(std::string & out) { | ||
1110 | + if (!isSet) { | ||
1111 | + out = defaults; | ||
1112 | + } else { | ||
1113 | + if (args.empty() || args[0]->empty()) | ||
1114 | + out = ""; | ||
1115 | + else { | ||
1116 | + out = *args[0]->at(0); | ||
1117 | + } | ||
1118 | + } | ||
1119 | +}; | ||
1120 | +/* ################################################################### */ | ||
1121 | +void OptionGroup::getInts(std::vector<int> & out) { | ||
1122 | + if (!isSet) { | ||
1123 | + if (!defaults.empty()) { | ||
1124 | + std::vector< std::string > strings; | ||
1125 | + SplitDelim(defaults, delim, strings); | ||
1126 | + StringsToInts(strings, out); | ||
1127 | + } | ||
1128 | + } else { | ||
1129 | + if (!(args.empty() || args[0]->empty())) | ||
1130 | + StringsToInts(args[0], &out); | ||
1131 | + } | ||
1132 | +}; | ||
1133 | +/* ################################################################### */ | ||
1134 | +void OptionGroup::getLongs(std::vector<long> & out) { | ||
1135 | + if (!isSet) { | ||
1136 | + if (!defaults.empty()) { | ||
1137 | + std::vector< std::string > strings; | ||
1138 | + SplitDelim(defaults, delim, strings); | ||
1139 | + StringsToLongs(strings, out); | ||
1140 | + } | ||
1141 | + } else { | ||
1142 | + if (!(args.empty() || args[0]->empty())) | ||
1143 | + StringsToLongs(args[0], &out); | ||
1144 | + } | ||
1145 | +}; | ||
1146 | +/* ################################################################### */ | ||
1147 | +void OptionGroup::getULongs(std::vector<unsigned long> & out) { | ||
1148 | + if (!isSet) { | ||
1149 | + if (!defaults.empty()) { | ||
1150 | + std::vector< std::string > strings; | ||
1151 | + SplitDelim(defaults, delim, strings); | ||
1152 | + StringsToULongs(strings, out); | ||
1153 | + } | ||
1154 | + } else { | ||
1155 | + if (!(args.empty() || args[0]->empty())) | ||
1156 | + StringsToULongs(args[0], &out); | ||
1157 | + } | ||
1158 | +}; | ||
1159 | +/* ################################################################### */ | ||
1160 | +void OptionGroup::getFloats(std::vector<float> & out) { | ||
1161 | + if (!isSet) { | ||
1162 | + if (!defaults.empty()) { | ||
1163 | + std::vector< std::string > strings; | ||
1164 | + SplitDelim(defaults, delim, strings); | ||
1165 | + StringsToFloats(strings, out); | ||
1166 | + } | ||
1167 | + } else { | ||
1168 | + if (!(args.empty() || args[0]->empty())) | ||
1169 | + StringsToFloats(args[0], &out); | ||
1170 | + } | ||
1171 | +}; | ||
1172 | +/* ################################################################### */ | ||
1173 | +void OptionGroup::getDoubles(std::vector<double> & out) { | ||
1174 | + if (!isSet) { | ||
1175 | + if (!defaults.empty()) { | ||
1176 | + std::vector< std::string > strings; | ||
1177 | + SplitDelim(defaults, delim, strings); | ||
1178 | + StringsToDoubles(strings, out); | ||
1179 | + } | ||
1180 | + } else { | ||
1181 | + if (!(args.empty() || args[0]->empty())) | ||
1182 | + StringsToDoubles(args[0], &out); | ||
1183 | + } | ||
1184 | +}; | ||
1185 | +/* ################################################################### */ | ||
1186 | +void OptionGroup::getStrings(std::vector<std::string>& out) { | ||
1187 | + if (!isSet) { | ||
1188 | + if (!defaults.empty()) { | ||
1189 | + SplitDelim(defaults, delim, out); | ||
1190 | + } | ||
1191 | + } else { | ||
1192 | + if (!(args.empty() || args[0]->empty())) | ||
1193 | + StringsToStrings(args[0], &out); | ||
1194 | + } | ||
1195 | +}; | ||
1196 | +/* ################################################################### */ | ||
1197 | +void OptionGroup::getMultiInts(std::vector< std::vector<int> >& out) { | ||
1198 | + if (!isSet) { | ||
1199 | + if (!defaults.empty()) { | ||
1200 | + std::vector< std::string > strings; | ||
1201 | + SplitDelim(defaults, delim, strings); | ||
1202 | + if (out.size() < 1) out.resize(1); | ||
1203 | + StringsToInts(strings, out[0]); | ||
1204 | + } | ||
1205 | + } else { | ||
1206 | + if (!args.empty()) { | ||
1207 | + int n = args.size(); | ||
1208 | + if (out.size() < n) out.resize(n); | ||
1209 | + for(int i=0; i < n; ++i) { | ||
1210 | + StringsToInts(args[i], &out[i]); | ||
1211 | + } | ||
1212 | + } | ||
1213 | + } | ||
1214 | +}; | ||
1215 | +/* ################################################################### */ | ||
1216 | +void OptionGroup::getMultiLongs(std::vector< std::vector<long> >& out) { | ||
1217 | + if (!isSet) { | ||
1218 | + if (!defaults.empty()) { | ||
1219 | + std::vector< std::string > strings; | ||
1220 | + SplitDelim(defaults, delim, strings); | ||
1221 | + if (out.size() < 1) out.resize(1); | ||
1222 | + StringsToLongs(strings, out[0]); | ||
1223 | + } | ||
1224 | + } else { | ||
1225 | + if (!args.empty()) { | ||
1226 | + int n = args.size(); | ||
1227 | + if (out.size() < n) out.resize(n); | ||
1228 | + for(int i=0; i < n; ++i) { | ||
1229 | + StringsToLongs(args[i], &out[i]); | ||
1230 | + } | ||
1231 | + } | ||
1232 | + } | ||
1233 | +}; | ||
1234 | +/* ################################################################### */ | ||
1235 | +void OptionGroup::getMultiULongs(std::vector< std::vector<unsigned long> >& out) { | ||
1236 | + if (!isSet) { | ||
1237 | + if (!defaults.empty()) { | ||
1238 | + std::vector< std::string > strings; | ||
1239 | + SplitDelim(defaults, delim, strings); | ||
1240 | + if (out.size() < 1) out.resize(1); | ||
1241 | + StringsToULongs(strings, out[0]); | ||
1242 | + } | ||
1243 | + } else { | ||
1244 | + if (!args.empty()) { | ||
1245 | + int n = args.size(); | ||
1246 | + if (out.size() < n) out.resize(n); | ||
1247 | + for(int i=0; i < n; ++i) { | ||
1248 | + StringsToULongs(args[i], &out[i]); | ||
1249 | + } | ||
1250 | + } | ||
1251 | + } | ||
1252 | +}; | ||
1253 | +/* ################################################################### */ | ||
1254 | +void OptionGroup::getMultiFloats(std::vector< std::vector<float> >& out) { | ||
1255 | + if (!isSet) { | ||
1256 | + if (!defaults.empty()) { | ||
1257 | + std::vector< std::string > strings; | ||
1258 | + SplitDelim(defaults, delim, strings); | ||
1259 | + if (out.size() < 1) out.resize(1); | ||
1260 | + StringsToFloats(strings, out[0]); | ||
1261 | + } | ||
1262 | + } else { | ||
1263 | + if (!args.empty()) { | ||
1264 | + int n = args.size(); | ||
1265 | + if (out.size() < n) out.resize(n); | ||
1266 | + for(int i=0; i < n; ++i) { | ||
1267 | + StringsToFloats(args[i], &out[i]); | ||
1268 | + } | ||
1269 | + } | ||
1270 | + } | ||
1271 | +}; | ||
1272 | +/* ################################################################### */ | ||
1273 | +void OptionGroup::getMultiDoubles(std::vector< std::vector<double> >& out) { | ||
1274 | + if (!isSet) { | ||
1275 | + if (!defaults.empty()) { | ||
1276 | + std::vector< std::string > strings; | ||
1277 | + SplitDelim(defaults, delim, strings); | ||
1278 | + if (out.size() < 1) out.resize(1); | ||
1279 | + StringsToDoubles(strings, out[0]); | ||
1280 | + } | ||
1281 | + } else { | ||
1282 | + if (!args.empty()) { | ||
1283 | + int n = args.size(); | ||
1284 | + if (out.size() < n) out.resize(n); | ||
1285 | + for(int i=0; i < n; ++i) { | ||
1286 | + StringsToDoubles(args[i], &out[i]); | ||
1287 | + } | ||
1288 | + } | ||
1289 | + } | ||
1290 | +}; | ||
1291 | +/* ################################################################### */ | ||
1292 | +void OptionGroup::getMultiStrings(std::vector< std::vector<std::string> >& out) { | ||
1293 | + if (!isSet) { | ||
1294 | + if (!defaults.empty()) { | ||
1295 | + std::vector< std::string > strings; | ||
1296 | + SplitDelim(defaults, delim, strings); | ||
1297 | + if (out.size() < 1) out.resize(1); | ||
1298 | + out[0] = strings; | ||
1299 | + } | ||
1300 | + } else { | ||
1301 | + if (!args.empty()) { | ||
1302 | + int n = args.size(); | ||
1303 | + if (out.size() < n) out.resize(n); | ||
1304 | + | ||
1305 | + for(int i=0; i < n; ++i) { | ||
1306 | + for(int j=0; j < args[i]->size(); ++j) | ||
1307 | + out[i].push_back( *args[i]->at(j) ); | ||
1308 | + } | ||
1309 | + } | ||
1310 | + } | ||
1311 | +}; | ||
1312 | +/* ################################################################### */ | ||
1313 | +typedef std::map< int, ezOptionValidator* > ValidatorMap; | ||
1314 | + | ||
1315 | +class ezOptionParser { | ||
1316 | +public: | ||
1317 | + // How to layout usage descriptions with the option flags. | ||
1318 | + enum Layout { ALIGN, INTERLEAVE, STAGGER }; | ||
1319 | + | ||
1320 | + inline ~ezOptionParser(); | ||
1321 | + | ||
1322 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator=0); | ||
1323 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator=0); | ||
1324 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator=0); | ||
1325 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator=0); | ||
1326 | + inline bool exportFile(const char * filename, bool all=false); | ||
1327 | + inline OptionGroup * get(const char * name); | ||
1328 | + inline void getUsage(std::string & usage, int width=80, Layout layout=ALIGN); | ||
1329 | + inline void getUsageDescriptions(std::string & usage, int width=80, Layout layout=STAGGER); | ||
1330 | + inline bool gotExpected(std::vector<std::string> & badOptions); | ||
1331 | + inline bool gotRequired(std::vector<std::string> & badOptions); | ||
1332 | + inline bool gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs); | ||
1333 | + inline bool importFile(const char * filename, char comment='#'); | ||
1334 | + inline int isSet(const char * name); | ||
1335 | + inline int isSet(std::string & name); | ||
1336 | + inline void parse(int argc, const char * argv[]); | ||
1337 | + inline void prettyPrint(std::string & out); | ||
1338 | + inline void reset(); | ||
1339 | + inline void resetArgs(); | ||
1340 | + | ||
1341 | + // Insert extra empty line betwee each option's usage description. | ||
1342 | + char doublespace; | ||
1343 | + // General description in human language on what the user's tool does. | ||
1344 | + // It's the first section to get printed in the full usage message. | ||
1345 | + std::string overview; | ||
1346 | + // A synopsis of command and options usage to show expected order of input arguments. | ||
1347 | + // It's the second section to get printed in the full usage message. | ||
1348 | + std::string syntax; | ||
1349 | + // Example (third) section in usage message. | ||
1350 | + std::string example; | ||
1351 | + // Final section printed in usage message. For contact, copyrights, version info. | ||
1352 | + std::string footer; | ||
1353 | + // Map from an option to an Id of its parent group. | ||
1354 | + std::map< std::string, int > optionGroupIds; | ||
1355 | + // Unordered collection of the option groups. | ||
1356 | + std::vector< OptionGroup* > groups; | ||
1357 | + // Store unexpected args in input. | ||
1358 | + std::vector< std::string* > unknownArgs; | ||
1359 | + // List of args that occur left-most before first option flag. | ||
1360 | + std::vector< std::string* > firstArgs; | ||
1361 | + // List of args that occur after last right-most option flag and its args. | ||
1362 | + std::vector< std::string* > lastArgs; | ||
1363 | + // List of validators. | ||
1364 | + ValidatorMap validators; | ||
1365 | + // Maps group id to a validator index into vector of validators. Validator index is -1 if there is no validator for group. | ||
1366 | + std::map< int, int > groupValidators; | ||
1367 | +}; | ||
1368 | +/* ################################################################### */ | ||
1369 | +ezOptionParser::~ezOptionParser() { | ||
1370 | + reset(); | ||
1371 | +} | ||
1372 | +/* ################################################################### */ | ||
1373 | +void ezOptionParser::reset() { | ||
1374 | + this->doublespace = 1; | ||
1375 | + | ||
1376 | + int i; | ||
1377 | + for(i=0; i < groups.size(); ++i) | ||
1378 | + delete groups[i]; | ||
1379 | + groups.clear(); | ||
1380 | + | ||
1381 | + for(i=0; i < unknownArgs.size(); ++i) | ||
1382 | + delete unknownArgs[i]; | ||
1383 | + unknownArgs.clear(); | ||
1384 | + | ||
1385 | + for(i=0; i < firstArgs.size(); ++i) | ||
1386 | + delete firstArgs[i]; | ||
1387 | + firstArgs.clear(); | ||
1388 | + | ||
1389 | + for(i=0; i < lastArgs.size(); ++i) | ||
1390 | + delete lastArgs[i]; | ||
1391 | + lastArgs.clear(); | ||
1392 | + | ||
1393 | + ValidatorMap::iterator it; | ||
1394 | + for(it = validators.begin(); it != validators.end(); ++it) | ||
1395 | + delete it->second; | ||
1396 | + | ||
1397 | + validators.clear(); | ||
1398 | + optionGroupIds.clear(); | ||
1399 | + groupValidators.clear(); | ||
1400 | +}; | ||
1401 | +/* ################################################################### */ | ||
1402 | +void ezOptionParser::resetArgs() { | ||
1403 | + int i; | ||
1404 | + for(i=0; i < groups.size(); ++i) | ||
1405 | + groups[i]->clearArgs(); | ||
1406 | + | ||
1407 | + for(i=0; i < unknownArgs.size(); ++i) | ||
1408 | + delete unknownArgs[i]; | ||
1409 | + unknownArgs.clear(); | ||
1410 | + | ||
1411 | + for(i=0; i < firstArgs.size(); ++i) | ||
1412 | + delete firstArgs[i]; | ||
1413 | + firstArgs.clear(); | ||
1414 | + | ||
1415 | + for(i=0; i < lastArgs.size(); ++i) | ||
1416 | + delete lastArgs[i]; | ||
1417 | + lastArgs.clear(); | ||
1418 | +}; | ||
1419 | +/* ################################################################### */ | ||
1420 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator) { | ||
1421 | + int id = this->groups.size(); | ||
1422 | + OptionGroup * g = new OptionGroup; | ||
1423 | + g->defaults = defaults; | ||
1424 | + g->isRequired = required; | ||
1425 | + g->expectArgs = expectArgs; | ||
1426 | + g->delim = delim; | ||
1427 | + g->isSet = 0; | ||
1428 | + g->help = help; | ||
1429 | + std::string *f1 = new std::string(flag1); | ||
1430 | + g->flags.push_back( f1 ); | ||
1431 | + this->optionGroupIds[flag1] = id; | ||
1432 | + this->groups.push_back(g); | ||
1433 | + | ||
1434 | + if (validator) { | ||
1435 | + int vid = validator->id; | ||
1436 | + validators[vid] = validator; | ||
1437 | + groupValidators[id] = vid; | ||
1438 | + } else { | ||
1439 | + groupValidators[id] = -1; | ||
1440 | + } | ||
1441 | +}; | ||
1442 | +/* ################################################################### */ | ||
1443 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator) { | ||
1444 | + int id = this->groups.size(); | ||
1445 | + OptionGroup * g = new OptionGroup; | ||
1446 | + g->defaults = defaults; | ||
1447 | + g->isRequired = required; | ||
1448 | + g->expectArgs = expectArgs; | ||
1449 | + g->delim = delim; | ||
1450 | + g->isSet = 0; | ||
1451 | + g->help = help; | ||
1452 | + std::string *f1 = new std::string(flag1); | ||
1453 | + g->flags.push_back( f1 ); | ||
1454 | + std::string *f2 = new std::string(flag2); | ||
1455 | + g->flags.push_back( f2 ); | ||
1456 | + this->optionGroupIds[flag1] = id; | ||
1457 | + this->optionGroupIds[flag2] = id; | ||
1458 | + | ||
1459 | + this->groups.push_back(g); | ||
1460 | + | ||
1461 | + if (validator) { | ||
1462 | + int vid = validator->id; | ||
1463 | + validators[vid] = validator; | ||
1464 | + groupValidators[id] = vid; | ||
1465 | + } else { | ||
1466 | + groupValidators[id] = -1; | ||
1467 | + } | ||
1468 | +}; | ||
1469 | +/* ################################################################### */ | ||
1470 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator) { | ||
1471 | + int id = this->groups.size(); | ||
1472 | + OptionGroup * g = new OptionGroup; | ||
1473 | + g->defaults = defaults; | ||
1474 | + g->isRequired = required; | ||
1475 | + g->expectArgs = expectArgs; | ||
1476 | + g->delim = delim; | ||
1477 | + g->isSet = 0; | ||
1478 | + g->help = help; | ||
1479 | + std::string *f1 = new std::string(flag1); | ||
1480 | + g->flags.push_back( f1 ); | ||
1481 | + std::string *f2 = new std::string(flag2); | ||
1482 | + g->flags.push_back( f2 ); | ||
1483 | + std::string *f3 = new std::string(flag3); | ||
1484 | + g->flags.push_back( f3 ); | ||
1485 | + this->optionGroupIds[flag1] = id; | ||
1486 | + this->optionGroupIds[flag2] = id; | ||
1487 | + this->optionGroupIds[flag3] = id; | ||
1488 | + | ||
1489 | + this->groups.push_back(g); | ||
1490 | + | ||
1491 | + if (validator) { | ||
1492 | + int vid = validator->id; | ||
1493 | + validators[vid] = validator; | ||
1494 | + groupValidators[id] = vid; | ||
1495 | + } else { | ||
1496 | + groupValidators[id] = -1; | ||
1497 | + } | ||
1498 | +}; | ||
1499 | +/* ################################################################### */ | ||
1500 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator) { | ||
1501 | + int id = this->groups.size(); | ||
1502 | + OptionGroup * g = new OptionGroup; | ||
1503 | + g->defaults = defaults; | ||
1504 | + g->isRequired = required; | ||
1505 | + g->expectArgs = expectArgs; | ||
1506 | + g->delim = delim; | ||
1507 | + g->isSet = 0; | ||
1508 | + g->help = help; | ||
1509 | + std::string *f1 = new std::string(flag1); | ||
1510 | + g->flags.push_back( f1 ); | ||
1511 | + std::string *f2 = new std::string(flag2); | ||
1512 | + g->flags.push_back( f2 ); | ||
1513 | + std::string *f3 = new std::string(flag3); | ||
1514 | + g->flags.push_back( f3 ); | ||
1515 | + std::string *f4 = new std::string(flag4); | ||
1516 | + g->flags.push_back( f4 ); | ||
1517 | + this->optionGroupIds[flag1] = id; | ||
1518 | + this->optionGroupIds[flag2] = id; | ||
1519 | + this->optionGroupIds[flag3] = id; | ||
1520 | + this->optionGroupIds[flag4] = id; | ||
1521 | + | ||
1522 | + this->groups.push_back(g); | ||
1523 | + | ||
1524 | + if (validator) { | ||
1525 | + int vid = validator->id; | ||
1526 | + validators[vid] = validator; | ||
1527 | + groupValidators[id] = vid; | ||
1528 | + } else { | ||
1529 | + groupValidators[id] = -1; | ||
1530 | + } | ||
1531 | +}; | ||
1532 | +/* ################################################################### */ | ||
1533 | +bool ezOptionParser::exportFile(const char * filename, bool all) { | ||
1534 | + int i; | ||
1535 | + std::string out; | ||
1536 | + bool quote; | ||
1537 | + | ||
1538 | + // Export the first args, except the program name, so start from 1. | ||
1539 | + for(i=1; i < firstArgs.size(); ++i) { | ||
1540 | + quote = ((firstArgs[i]->find_first_of(" \t") != std::string::npos) && (firstArgs[i]->find_first_of("\'\"") == std::string::npos)); | ||
1541 | + | ||
1542 | + if (quote) | ||
1543 | + out.append("\""); | ||
1544 | + | ||
1545 | + out.append(*firstArgs[i]); | ||
1546 | + if (quote) | ||
1547 | + out.append("\""); | ||
1548 | + | ||
1549 | + out.append(" "); | ||
1550 | + } | ||
1551 | + | ||
1552 | + if (firstArgs.size() > 1) | ||
1553 | + out.append("\n"); | ||
1554 | + | ||
1555 | + std::vector<std::string* > stringPtrs(groups.size()); | ||
1556 | + int j,m; | ||
1557 | + int n = groups.size(); | ||
1558 | + for(i=0; i < n; ++i) { | ||
1559 | + stringPtrs[i] = groups[i]->flags[0]; | ||
1560 | + } | ||
1561 | + | ||
1562 | + OptionGroup *g; | ||
1563 | + // Sort first flag of each group with other groups. | ||
1564 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | ||
1565 | + for(i=0; i < n; ++i) { | ||
1566 | + g = get(stringPtrs[i]->c_str()); | ||
1567 | + if (g->isSet || all) { | ||
1568 | + if (!g->isSet || g->args.empty()) { | ||
1569 | + if (!g->defaults.empty()) { | ||
1570 | + out.append(*stringPtrs[i]); | ||
1571 | + out.append(" "); | ||
1572 | + quote = ((g->defaults.find_first_of(" \t") != std::string::npos) && (g->defaults.find_first_of("\'\"") == std::string::npos)); | ||
1573 | + if (quote) | ||
1574 | + out.append("\""); | ||
1575 | + | ||
1576 | + out.append(g->defaults); | ||
1577 | + if (quote) | ||
1578 | + out.append("\""); | ||
1579 | + | ||
1580 | + out.append("\n"); | ||
1581 | + } | ||
1582 | + } else { | ||
1583 | + int n = g->args.size(); | ||
1584 | + for(int j=0; j < n; ++j) { | ||
1585 | + out.append(*stringPtrs[i]); | ||
1586 | + out.append(" "); | ||
1587 | + m = g->args[j]->size(); | ||
1588 | + | ||
1589 | + for(int k=0; k < m; ++k) { | ||
1590 | + quote = ( (*g->args[j]->at(k)).find_first_of(" \t") != std::string::npos ); | ||
1591 | + if (quote) | ||
1592 | + out.append("\""); | ||
1593 | + | ||
1594 | + out.append(*g->args[j]->at(k)); | ||
1595 | + if (quote) | ||
1596 | + out.append("\""); | ||
1597 | + | ||
1598 | + if ((g->delim) && ((k+1) != m)) | ||
1599 | + out.append(1,g->delim); | ||
1600 | + } | ||
1601 | + out.append("\n"); | ||
1602 | + } | ||
1603 | + } | ||
1604 | + } | ||
1605 | + } | ||
1606 | + | ||
1607 | + // Export the last args. | ||
1608 | + for(i=0; i < lastArgs.size(); ++i) { | ||
1609 | + quote = ( lastArgs[i]->find_first_of(" \t") != std::string::npos ); | ||
1610 | + if (quote) | ||
1611 | + out.append("\""); | ||
1612 | + | ||
1613 | + out.append(*lastArgs[i]); | ||
1614 | + if (quote) | ||
1615 | + out.append("\""); | ||
1616 | + | ||
1617 | + out.append(" "); | ||
1618 | + } | ||
1619 | + | ||
1620 | + std::ofstream file(filename); | ||
1621 | + if (!file.is_open()) | ||
1622 | + return false; | ||
1623 | + | ||
1624 | + file << out; | ||
1625 | + file.close(); | ||
1626 | + | ||
1627 | + return true; | ||
1628 | +}; | ||
1629 | +/* ################################################################### */ | ||
1630 | +// Does not overwrite current options. | ||
1631 | +// Returns true if file was read successfully. | ||
1632 | +// So if this is used before parsing CLI, then option values will reflect | ||
1633 | +// this file, but if used after parsing CLI, then values will contain | ||
1634 | +// both CLI values and file's values. | ||
1635 | +// | ||
1636 | +// Comment lines are allowed if prefixed with #. | ||
1637 | +// Strings should be quoted as usual. | ||
1638 | +bool ezOptionParser::importFile(const char * filename, char comment) { | ||
1639 | + std::ifstream file (filename, std::ios::in | std::ios::ate); | ||
1640 | + if (!file.is_open()) | ||
1641 | + return false; | ||
1642 | + | ||
1643 | + // Read entire file contents. | ||
1644 | + std::ifstream::pos_type size = file.tellg(); | ||
1645 | + char * memblock = new char[(int)size+1]; // Add one for end of string. | ||
1646 | + file.seekg (0, std::ios::beg); | ||
1647 | + file.read (memblock, size); | ||
1648 | + memblock[size] = '\0'; | ||
1649 | + file.close(); | ||
1650 | + | ||
1651 | + // Find comment lines. | ||
1652 | + std::list<std::string*> lines; | ||
1653 | + std::string memblockstring(memblock); | ||
1654 | + delete[] memblock; | ||
1655 | + SplitDelim(memblockstring, '\n', lines); | ||
1656 | + int i,j,n; | ||
1657 | + std::list<std::string*>::iterator iter; | ||
1658 | + std::vector<int> sq, dq; // Single and double quote indices. | ||
1659 | + std::vector<int>::iterator lo; // For searching quote indices. | ||
1660 | + size_t pos; | ||
1661 | + const char *str; | ||
1662 | + std::string *line; | ||
1663 | + // Find all single and double quotes to correctly handle comment tokens. | ||
1664 | + for(iter=lines.begin(); iter != lines.end(); ++iter) { | ||
1665 | + line = *iter; | ||
1666 | + str = line->c_str(); | ||
1667 | + n = line->size(); | ||
1668 | + sq.clear(); | ||
1669 | + dq.clear(); | ||
1670 | + if (n) { | ||
1671 | + // If first char is comment, then erase line and continue. | ||
1672 | + pos = line->find_first_not_of(" \t\r"); | ||
1673 | + if ((pos==std::string::npos) || (line->at(pos)==comment)) { | ||
1674 | + line->erase(); | ||
1675 | + continue; | ||
1676 | + } else { | ||
1677 | + // Erase whitespace prefix. | ||
1678 | + line->erase(0,pos); | ||
1679 | + n = line->size(); | ||
1680 | + } | ||
1681 | + | ||
1682 | + if (line->at(0)=='"') | ||
1683 | + dq.push_back(0); | ||
1684 | + | ||
1685 | + if (line->at(0)=='\'') | ||
1686 | + sq.push_back(0); | ||
1687 | + } else { // Empty line. | ||
1688 | + continue; | ||
1689 | + } | ||
1690 | + | ||
1691 | + for(i=1; i < n; ++i) { | ||
1692 | + if ( (str[i]=='"') && (str[i-1]!='\\') ) | ||
1693 | + dq.push_back(i); | ||
1694 | + else if ( (str[i]=='\'') && (str[i-1]!='\\') ) | ||
1695 | + sq.push_back(i); | ||
1696 | + } | ||
1697 | + // Scan for comments, and when found, check bounds of quotes. | ||
1698 | + // Start with second char because already checked first char. | ||
1699 | + for(i=1; i < n; ++i) { | ||
1700 | + if ( (line->at(i)==comment) && (line->at(i-1)!='\\') ) { | ||
1701 | + // If within open/close quote pair, then not real comment. | ||
1702 | + if (sq.size()) { | ||
1703 | + lo = std::lower_bound(sq.begin(), sq.end(), i); | ||
1704 | + // All start of strings will be even indices, closing quotes is odd indices. | ||
1705 | + j = (int)(lo-sq.begin()); | ||
1706 | + if ( (j % 2) == 0) { // Even implies comment char not in quote pair. | ||
1707 | + // Erase from comment char to end of line. | ||
1708 | + line->erase(i); | ||
1709 | + break; | ||
1710 | + } | ||
1711 | + } else if (dq.size()) { | ||
1712 | + // Repeat tests for double quotes. | ||
1713 | + lo = std::lower_bound(dq.begin(), dq.end(), i); | ||
1714 | + j = (int)(lo-dq.begin()); | ||
1715 | + if ( (j % 2) == 0) { | ||
1716 | + line->erase(i); | ||
1717 | + break; | ||
1718 | + } | ||
1719 | + } else { | ||
1720 | + // Not in quotes. | ||
1721 | + line->erase(i); | ||
1722 | + break; | ||
1723 | + } | ||
1724 | + } | ||
1725 | + } | ||
1726 | + } | ||
1727 | + | ||
1728 | + std::string cmd; | ||
1729 | + // Convert list to string without newlines to simulate commandline. | ||
1730 | + for(iter=lines.begin(); iter != lines.end(); ++iter) { | ||
1731 | + if (! (*iter)->empty()) { | ||
1732 | + cmd.append(**iter); | ||
1733 | + cmd.append(" "); | ||
1734 | + } | ||
1735 | + } | ||
1736 | + | ||
1737 | + // Now parse as if from command line. | ||
1738 | + int argc=0; | ||
1739 | + char** argv = CommandLineToArgvA((char*)cmd.c_str(), &argc); | ||
1740 | + | ||
1741 | + // Parse. | ||
1742 | + parse(argc, (const char**)argv); | ||
1743 | + if (argv) free(argv); | ||
1744 | + for(iter=lines.begin(); iter != lines.end(); ++iter) | ||
1745 | + delete *iter; | ||
1746 | + | ||
1747 | + return true; | ||
1748 | +}; | ||
1749 | +/* ################################################################### */ | ||
1750 | +int ezOptionParser::isSet(const char * name) { | ||
1751 | + std::string sname(name); | ||
1752 | + | ||
1753 | + if (this->optionGroupIds.count(sname)) { | ||
1754 | + return this->groups[this->optionGroupIds[sname]]->isSet; | ||
1755 | + } | ||
1756 | + | ||
1757 | + return 0; | ||
1758 | +}; | ||
1759 | +/* ################################################################### */ | ||
1760 | +int ezOptionParser::isSet(std::string & name) { | ||
1761 | + if (this->optionGroupIds.count(name)) { | ||
1762 | + return this->groups[this->optionGroupIds[name]]->isSet; | ||
1763 | + } | ||
1764 | + | ||
1765 | + return 0; | ||
1766 | +}; | ||
1767 | +/* ################################################################### */ | ||
1768 | +OptionGroup * ezOptionParser::get(const char * name) { | ||
1769 | + if (optionGroupIds.count(name)) { | ||
1770 | + return groups[optionGroupIds[name]]; | ||
1771 | + } | ||
1772 | + | ||
1773 | + return 0; | ||
1774 | +}; | ||
1775 | +/* ################################################################### */ | ||
1776 | +void ezOptionParser::getUsage(std::string & usage, int width, Layout layout) { | ||
1777 | + | ||
1778 | + usage.append(overview); | ||
1779 | + usage.append("\n\n"); | ||
1780 | + usage.append("USAGE: "); | ||
1781 | + usage.append(syntax); | ||
1782 | + usage.append("\n\nOPTIONS:\n\n"); | ||
1783 | + getUsageDescriptions(usage, width, layout); | ||
1784 | + | ||
1785 | + if (!example.empty()) { | ||
1786 | + usage.append("EXAMPLES:\n\n"); | ||
1787 | + usage.append(example); | ||
1788 | + } | ||
1789 | + | ||
1790 | + if (!footer.empty()) { | ||
1791 | + usage.append(footer); | ||
1792 | + } | ||
1793 | +}; | ||
1794 | +/* ################################################################### */ | ||
1795 | +// Creates 2 column formatted help descriptions for each option flag. | ||
1796 | +void ezOptionParser::getUsageDescriptions(std::string & usage, int width, Layout layout) { | ||
1797 | + // Sort each flag list amongst each group. | ||
1798 | + int i; | ||
1799 | + // Store index of flag groups before sort for easy lookup later. | ||
1800 | + std::map<std::string*, int> stringPtrToIndexMap; | ||
1801 | + std::vector<std::string* > stringPtrs(groups.size()); | ||
1802 | + | ||
1803 | + for(i=0; i < groups.size(); ++i) { | ||
1804 | + std::sort(groups[i]->flags.begin(), groups[i]->flags.end(), CmpOptStringPtr); | ||
1805 | + stringPtrToIndexMap[groups[i]->flags[0]] = i; | ||
1806 | + stringPtrs[i] = groups[i]->flags[0]; | ||
1807 | + } | ||
1808 | + | ||
1809 | + size_t j, k, n; | ||
1810 | + std::string opts; | ||
1811 | + std::vector<std::string> sortedOpts; | ||
1812 | + // Sort first flag of each group with other groups. | ||
1813 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | ||
1814 | + for(i=0; i < groups.size(); ++i) { | ||
1815 | + //printf("DEBUG:%d: %d %d %s\n", __LINE__, i, stringPtrToIndexMap[stringPtrs[i]], stringPtrs[i]->c_str()); | ||
1816 | + k = stringPtrToIndexMap[stringPtrs[i]]; | ||
1817 | + opts.clear(); | ||
1818 | + for(j=0; j < groups[k]->flags.size()-1; ++j) { | ||
1819 | + opts.append(*groups[k]->flags[j]); | ||
1820 | + opts.append(", "); | ||
1821 | + | ||
1822 | + if (opts.size() > width) | ||
1823 | + opts.append("\n"); | ||
1824 | + } | ||
1825 | + // The last flag. No need to append comma anymore. | ||
1826 | + opts.append( *groups[k]->flags[j] ); | ||
1827 | + | ||
1828 | + if (groups[k]->expectArgs) { | ||
1829 | + opts.append(" ARG"); | ||
1830 | + | ||
1831 | + if (groups[k]->delim) { | ||
1832 | + opts.append("1["); | ||
1833 | + opts.append(1, groups[k]->delim); | ||
1834 | + opts.append("ARGn]"); | ||
1835 | + } | ||
1836 | + } | ||
1837 | + | ||
1838 | + sortedOpts.push_back(opts); | ||
1839 | + } | ||
1840 | + | ||
1841 | + // Each option group will use this to build multiline help description. | ||
1842 | + std::list<std::string*> desc; | ||
1843 | + // Number of whitespaces from start of line to description (interleave layout) or | ||
1844 | + // gap between flag names and description (align, stagger layouts). | ||
1845 | + int gutter = 3; | ||
1846 | + | ||
1847 | + // Find longest opt flag string to set column start for help usage descriptions. | ||
1848 | + int maxlen=0; | ||
1849 | + if (layout == ALIGN) { | ||
1850 | + for(i=0; i < groups.size(); ++i) { | ||
1851 | + if (maxlen < sortedOpts[i].size()) | ||
1852 | + maxlen = sortedOpts[i].size(); | ||
1853 | + } | ||
1854 | + } | ||
1855 | + | ||
1856 | + // The amount of space remaining on a line for help text after flags. | ||
1857 | + int helpwidth; | ||
1858 | + std::list<std::string*>::iterator cIter, insertionIter; | ||
1859 | + size_t pos; | ||
1860 | + for(i=0; i < groups.size(); ++i) { | ||
1861 | + k = stringPtrToIndexMap[stringPtrs[i]]; | ||
1862 | + | ||
1863 | + if (layout == STAGGER) | ||
1864 | + maxlen = sortedOpts[i].size(); | ||
1865 | + | ||
1866 | + int pad = gutter + maxlen; | ||
1867 | + helpwidth = width - pad; | ||
1868 | + | ||
1869 | + // All the following split-fu could be optimized by just using substring (offset, length) tuples, but just to get it done, we'll do some not-too expensive string copying. | ||
1870 | + SplitDelim(groups[k]->help, '\n', desc); | ||
1871 | + // Split lines longer than allowable help width. | ||
1872 | + for(insertionIter=desc.begin(), cIter=insertionIter++; | ||
1873 | + cIter != desc.end(); | ||
1874 | + cIter=insertionIter++) { | ||
1875 | + if ((*cIter)->size() > helpwidth) { | ||
1876 | + // Get pointer to next string to insert new strings before it. | ||
1877 | + std::string *rem = *cIter; | ||
1878 | + // Remove this line and add back in pieces. | ||
1879 | + desc.erase(cIter); | ||
1880 | + // Loop until remaining string is short enough. | ||
1881 | + while (rem->size() > helpwidth) { | ||
1882 | + // Find whitespace to split before helpwidth. | ||
1883 | + if (rem->at(helpwidth) == ' ') { | ||
1884 | + // If word ends exactly at helpwidth, then split after it. | ||
1885 | + pos = helpwidth; | ||
1886 | + } else { | ||
1887 | + // Otherwise, split occurs midword, so find whitespace before this word. | ||
1888 | + pos = rem->rfind(" ", helpwidth); | ||
1889 | + } | ||
1890 | + // Insert split string. | ||
1891 | + desc.insert(insertionIter, new std::string(*rem, 0, pos)); | ||
1892 | + // Now skip any whitespace to start new line. | ||
1893 | + pos = rem->find_first_not_of(' ', pos); | ||
1894 | + rem->erase(0, pos); | ||
1895 | + } | ||
1896 | + | ||
1897 | + if (rem->size()) | ||
1898 | + desc.insert(insertionIter, rem); | ||
1899 | + else | ||
1900 | + delete rem; | ||
1901 | + } | ||
1902 | + } | ||
1903 | + | ||
1904 | + usage.append(sortedOpts[i]); | ||
1905 | + if (layout != INTERLEAVE) | ||
1906 | + // Add whitespace between option names and description. | ||
1907 | + usage.append(pad - sortedOpts[i].size(), ' '); | ||
1908 | + else { | ||
1909 | + usage.append("\n"); | ||
1910 | + usage.append(gutter, ' '); | ||
1911 | + } | ||
1912 | + | ||
1913 | + // First line already padded above (before calling SplitDelim) after option flag names. | ||
1914 | + cIter = desc.begin(); | ||
1915 | + usage.append(**cIter); | ||
1916 | + usage.append("\n"); | ||
1917 | + // Now inject the pad for each line. | ||
1918 | + for(++cIter; cIter != desc.end(); ++cIter) { | ||
1919 | + usage.append(pad, ' '); | ||
1920 | + usage.append(**cIter); | ||
1921 | + usage.append("\n"); | ||
1922 | + } | ||
1923 | + | ||
1924 | + if (this->doublespace) usage.append("\n"); | ||
1925 | + | ||
1926 | + if (desc.size()) { | ||
1927 | + for(cIter=desc.begin(); cIter != desc.end(); ++cIter) | ||
1928 | + delete *cIter; | ||
1929 | + | ||
1930 | + desc.clear(); | ||
1931 | + } | ||
1932 | + } | ||
1933 | +}; | ||
1934 | +/* ################################################################### */ | ||
1935 | +bool ezOptionParser::gotExpected(std::vector<std::string> & badOptions) { | ||
1936 | + int i,j; | ||
1937 | + | ||
1938 | + for(i=0; i < groups.size(); ++i) { | ||
1939 | + OptionGroup *g = groups[i]; | ||
1940 | + // If was set, ensure number of args is correct. | ||
1941 | + if (g->isSet) { | ||
1942 | + if ((g->expectArgs != 0) && g->args.empty()) { | ||
1943 | + badOptions.push_back(*g->flags[0]); | ||
1944 | + continue; | ||
1945 | + } | ||
1946 | + | ||
1947 | + for(j=0; j < g->args.size(); ++j) { | ||
1948 | + if ((g->expectArgs != -1) && (g->expectArgs != g->args[j]->size())) | ||
1949 | + badOptions.push_back(*g->flags[0]); | ||
1950 | + } | ||
1951 | + } | ||
1952 | + } | ||
1953 | + | ||
1954 | + return badOptions.empty(); | ||
1955 | +}; | ||
1956 | +/* ################################################################### */ | ||
1957 | +bool ezOptionParser::gotRequired(std::vector<std::string> & badOptions) { | ||
1958 | + int i; | ||
1959 | + | ||
1960 | + for(i=0; i < groups.size(); ++i) { | ||
1961 | + OptionGroup *g = groups[i]; | ||
1962 | + // Simple case when required but user never set it. | ||
1963 | + if (g->isRequired && (!g->isSet)) { | ||
1964 | + badOptions.push_back(*g->flags[0]); | ||
1965 | + continue; | ||
1966 | + } | ||
1967 | + } | ||
1968 | + | ||
1969 | + return badOptions.empty(); | ||
1970 | +}; | ||
1971 | +/* ################################################################### */ | ||
1972 | +bool ezOptionParser::gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs) { | ||
1973 | + int groupid, validatorid; | ||
1974 | + std::map< int, int >::iterator it; | ||
1975 | + | ||
1976 | + for(it = groupValidators.begin(); it != groupValidators.end(); ++it) { | ||
1977 | + groupid = it->first; | ||
1978 | + validatorid = it->second; | ||
1979 | + if (validatorid < 0) continue; | ||
1980 | + | ||
1981 | + OptionGroup *g = groups[groupid]; | ||
1982 | + ezOptionValidator *v = validators[validatorid]; | ||
1983 | + bool nextgroup = false; | ||
1984 | + | ||
1985 | + for (int i = 0; i < g->args.size(); ++i) { | ||
1986 | + if (nextgroup) break; | ||
1987 | + std::vector< std::string* > * args = g->args[i]; | ||
1988 | + for (int j = 0; j < args->size(); ++j) { | ||
1989 | + if (!v->isValid(args->at(j))) { | ||
1990 | + badOptions.push_back(*g->flags[0]); | ||
1991 | + badArgs.push_back(*args->at(j)); | ||
1992 | + nextgroup = true; | ||
1993 | + break; | ||
1994 | + } | ||
1995 | + } | ||
1996 | + } | ||
1997 | + } | ||
1998 | + | ||
1999 | + return badOptions.empty(); | ||
2000 | +}; | ||
2001 | +/* ################################################################### */ | ||
2002 | +void ezOptionParser::parse(int argc, const char * argv[]) { | ||
2003 | + if (argc < 1) return; | ||
2004 | + | ||
2005 | + /* | ||
2006 | + std::map<std::string,int>::iterator it; | ||
2007 | + for ( it=optionGroupIds.begin() ; it != optionGroupIds.end(); it++ ) | ||
2008 | + std::cout << (*it).first << " => " << (*it).second << std::endl; | ||
2009 | + */ | ||
2010 | + | ||
2011 | + int found=0, i, k, firstOptIndex=0, lastOptIndex=0; | ||
2012 | + std::string s; | ||
2013 | + OptionGroup *g; | ||
2014 | + | ||
2015 | + for(i=0; i < argc; ++i) { | ||
2016 | + s = argv[i]; | ||
2017 | + | ||
2018 | + if (optionGroupIds.count(s)) | ||
2019 | + break; | ||
2020 | + } | ||
2021 | + | ||
2022 | + firstOptIndex = i; | ||
2023 | + | ||
2024 | + if (firstOptIndex == argc) { | ||
2025 | + // No flags encountered, so set last args. | ||
2026 | + this->firstArgs.push_back(new std::string(argv[0])); | ||
2027 | + | ||
2028 | + for(k=1; k < argc; ++k) | ||
2029 | + this->lastArgs.push_back(new std::string(argv[k])); | ||
2030 | + | ||
2031 | + return; | ||
2032 | + } | ||
2033 | + | ||
2034 | + // Store initial args before opts appear. | ||
2035 | + for(k=0; k < i; ++k) { | ||
2036 | + this->firstArgs.push_back(new std::string(argv[k])); | ||
2037 | + } | ||
2038 | + | ||
2039 | + for(; i < argc; ++i) { | ||
2040 | + s = argv[i]; | ||
2041 | + | ||
2042 | + if (optionGroupIds.count(s)) { | ||
2043 | + k = optionGroupIds[s]; | ||
2044 | + g = groups[k]; | ||
2045 | + g->isSet = 1; | ||
2046 | + g->parseIndex.push_back(i); | ||
2047 | + | ||
2048 | + if (g->expectArgs) { | ||
2049 | + // Read ahead to get args. | ||
2050 | + ++i; | ||
2051 | + if (i >= argc) return; | ||
2052 | + g->args.push_back(new std::vector<std::string*>); | ||
2053 | + SplitDelim(argv[i], g->delim, g->args.back()); | ||
2054 | + } | ||
2055 | + lastOptIndex = i; | ||
2056 | + } | ||
2057 | + } | ||
2058 | + | ||
2059 | + // Scan for unknown opts/arguments. | ||
2060 | + for(i=firstOptIndex; i <= lastOptIndex; ++i) { | ||
2061 | + s = argv[i]; | ||
2062 | + | ||
2063 | + if (optionGroupIds.count(s)) { | ||
2064 | + k = optionGroupIds[s]; | ||
2065 | + g = groups[k]; | ||
2066 | + if (g->expectArgs) { | ||
2067 | + // Read ahead for args and skip them. | ||
2068 | + ++i; | ||
2069 | + } | ||
2070 | + } else { | ||
2071 | + unknownArgs.push_back(new std::string(argv[i])); | ||
2072 | + } | ||
2073 | + } | ||
2074 | + | ||
2075 | + if ( lastOptIndex >= (argc-1) ) return; | ||
2076 | + | ||
2077 | + // Store final args without flags. | ||
2078 | + for(k=lastOptIndex + 1; k < argc; ++k) { | ||
2079 | + this->lastArgs.push_back(new std::string(argv[k])); | ||
2080 | + } | ||
2081 | +}; | ||
2082 | +/* ################################################################### */ | ||
2083 | +void ezOptionParser::prettyPrint(std::string & out) { | ||
2084 | + char tmp[256]; | ||
2085 | + int i,j,k; | ||
2086 | + | ||
2087 | + out += "First Args:\n"; | ||
2088 | + for(i=0; i < firstArgs.size(); ++i) { | ||
2089 | + sprintf(tmp, "%d: %s\n", i+1, firstArgs[i]->c_str()); | ||
2090 | + out += tmp; | ||
2091 | + } | ||
2092 | + | ||
2093 | + // Sort the option flag names. | ||
2094 | + int n = groups.size(); | ||
2095 | + std::vector<std::string* > stringPtrs(n); | ||
2096 | + for(i=0; i < n; ++i) { | ||
2097 | + stringPtrs[i] = groups[i]->flags[0]; | ||
2098 | + } | ||
2099 | + | ||
2100 | + // Sort first flag of each group with other groups. | ||
2101 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | ||
2102 | + | ||
2103 | + out += "\nOptions:\n"; | ||
2104 | + OptionGroup *g; | ||
2105 | + for(i=0; i < n; ++i) { | ||
2106 | + g = get(stringPtrs[i]->c_str()); | ||
2107 | + out += "\n"; | ||
2108 | + // The flag names: | ||
2109 | + for(j=0; j < g->flags.size()-1; ++j) { | ||
2110 | + sprintf(tmp, "%s, ", g->flags[j]->c_str()); | ||
2111 | + out += tmp; | ||
2112 | + } | ||
2113 | + sprintf(tmp, "%s:\n", g->flags.back()->c_str()); | ||
2114 | + out += tmp; | ||
2115 | + | ||
2116 | + if (g->isSet) { | ||
2117 | + if (g->expectArgs) { | ||
2118 | + if (g->args.empty()) { | ||
2119 | + sprintf(tmp, "%s (default)\n", g->defaults.c_str()); | ||
2120 | + out += tmp; | ||
2121 | + } else { | ||
2122 | + for(k=0; k < g->args.size(); ++k) { | ||
2123 | + for(j=0; j < g->args[k]->size()-1; ++j) { | ||
2124 | + sprintf(tmp, "%s%c", g->args[k]->at(j)->c_str(), g->delim); | ||
2125 | + out += tmp; | ||
2126 | + } | ||
2127 | + sprintf(tmp, "%s\n", g->args[k]->back()->c_str(), g->delim); | ||
2128 | + out += tmp; | ||
2129 | + } | ||
2130 | + } | ||
2131 | + } else { // Set but no args expected. | ||
2132 | + sprintf(tmp, "Set\n"); | ||
2133 | + out += tmp; | ||
2134 | + } | ||
2135 | + } else { | ||
2136 | + sprintf(tmp, "Not set\n"); | ||
2137 | + out += tmp; | ||
2138 | + } | ||
2139 | + } | ||
2140 | + | ||
2141 | + out += "\nLast Args:\n"; | ||
2142 | + for(i=0; i < lastArgs.size(); ++i) { | ||
2143 | + sprintf(tmp, "%d: %s\n", i+1, lastArgs[i]->c_str()); | ||
2144 | + out += tmp; | ||
2145 | + } | ||
2146 | + | ||
2147 | + out += "\nUnknown Args:\n"; | ||
2148 | + for(i=0; i < unknownArgs.size(); ++i) { | ||
2149 | + sprintf(tmp, "%d: %s\n", i+1, unknownArgs[i]->c_str()); | ||
2150 | + out += tmp; | ||
2151 | + } | ||
2152 | +}; | ||
2153 | +} | ||
2154 | +/* ################################################################### */ | ||
2155 | +#endif /* EZ_OPTION_PARSER_H */ |
morfeusz/java/pl/waw/ipipan/morfeusz/MorfeuszProcessorType.java
0 → 100644
1 | +/* ---------------------------------------------------------------------------- | ||
2 | + * This file was automatically generated by SWIG (http://www.swig.org). | ||
3 | + * Version 2.0.10 | ||
4 | + * | ||
5 | + * Do not make changes to this file unless you know what you are doing--modify | ||
6 | + * the SWIG interface file instead. | ||
7 | + * ----------------------------------------------------------------------------- */ | ||
8 | + | ||
9 | +package pl.waw.ipipan.morfeusz; | ||
10 | + | ||
11 | +public enum MorfeuszProcessorType { | ||
12 | + GENERATOR, | ||
13 | + ANALYZER; | ||
14 | + | ||
15 | + public final int swigValue() { | ||
16 | + return swigValue; | ||
17 | + } | ||
18 | + | ||
19 | + public static MorfeuszProcessorType swigToEnum(int swigValue) { | ||
20 | + MorfeuszProcessorType[] swigValues = MorfeuszProcessorType.class.getEnumConstants(); | ||
21 | + if (swigValue < swigValues.length && swigValue >= 0 && swigValues[swigValue].swigValue == swigValue) | ||
22 | + return swigValues[swigValue]; | ||
23 | + for (MorfeuszProcessorType swigEnum : swigValues) | ||
24 | + if (swigEnum.swigValue == swigValue) | ||
25 | + return swigEnum; | ||
26 | + throw new IllegalArgumentException("No enum " + MorfeuszProcessorType.class + " with value " + swigValue); | ||
27 | + } | ||
28 | + | ||
29 | + @SuppressWarnings("unused") | ||
30 | + private MorfeuszProcessorType() { | ||
31 | + this.swigValue = SwigNext.next++; | ||
32 | + } | ||
33 | + | ||
34 | + @SuppressWarnings("unused") | ||
35 | + private MorfeuszProcessorType(int swigValue) { | ||
36 | + this.swigValue = swigValue; | ||
37 | + SwigNext.next = swigValue+1; | ||
38 | + } | ||
39 | + | ||
40 | + @SuppressWarnings("unused") | ||
41 | + private MorfeuszProcessorType(MorfeuszProcessorType swigEnum) { | ||
42 | + this.swigValue = swigEnum.swigValue; | ||
43 | + SwigNext.next = this.swigValue+1; | ||
44 | + } | ||
45 | + | ||
46 | + private final int swigValue; | ||
47 | + | ||
48 | + private static class SwigNext { | ||
49 | + private static int next = 0; | ||
50 | + } | ||
51 | +} | ||
52 | + |
morfeusz/morfeusz_analyzer.cpp
@@ -8,18 +8,96 @@ | @@ -8,18 +8,96 @@ | ||
8 | #include <cstdlib> | 8 | #include <cstdlib> |
9 | #include <iostream> | 9 | #include <iostream> |
10 | #include <vector> | 10 | #include <vector> |
11 | +#include <map> | ||
11 | #include "fsa/fsa.hpp" | 12 | #include "fsa/fsa.hpp" |
12 | #include "Tagset.hpp" | 13 | #include "Tagset.hpp" |
13 | #include "Morfeusz.hpp" | 14 | #include "Morfeusz.hpp" |
14 | #include "const.hpp" | 15 | #include "const.hpp" |
15 | 16 | ||
17 | +#include "cli/cli.hpp" | ||
18 | + | ||
16 | using namespace std; | 19 | using namespace std; |
20 | +using namespace ez; | ||
21 | + | ||
22 | +int main(int argc, const char** argv) { | ||
23 | + | ||
24 | + ezOptionParser opt; | ||
25 | + | ||
26 | + opt.overview = "Morfeusz analyzer"; | ||
27 | + opt.syntax = string(argv[0]) + " [OPTIONS]"; | ||
28 | + opt.example = string(argv[0]) + " --aggl strict --praet split --input /path/to/file.fsa"; | ||
29 | + // opt.footer = "Morfeusz Copyright (C) 2014\n"; | ||
30 | + | ||
31 | + opt.add( | ||
32 | + "", // Default. | ||
33 | + 0, // Required? | ||
34 | + 0, // Number of args expected. | ||
35 | + 0, // Delimiter if expecting multiple args. | ||
36 | + "Display usage instructions.", // Help description. | ||
37 | + "-h", // Flag token. | ||
38 | + "-help", // Flag token. | ||
39 | + "--help", // Flag token. | ||
40 | + "--usage" // Flag token. | ||
41 | + ); | ||
42 | + | ||
43 | + opt.add( | ||
44 | + "", // Default. | ||
45 | + 0, // Required? | ||
46 | + 1, // Number of args expected. | ||
47 | + 0, // Delimiter if expecting multiple args. | ||
48 | + "file with analyzer finite state automaton and data, created with buildfsa.py script.", // Help description. | ||
49 | + "-i", // Flag token. | ||
50 | + "-input", // Flag token. | ||
51 | + "--input" // Flag token. | ||
52 | + ); | ||
53 | + | ||
54 | + opt.add( | ||
55 | + "", // Default. | ||
56 | + 0, // Required? | ||
57 | + 1, // Number of args expected. | ||
58 | + 0, // Delimiter if expecting multiple args. | ||
59 | + "aggl option.", // Help description. | ||
60 | + "-a", // Flag token. | ||
61 | + "-aggl", // Flag token. | ||
62 | + "--aggl" // Flag token. | ||
63 | + ); | ||
64 | + | ||
65 | + opt.add( | ||
66 | + "", // Default. | ||
67 | + 0, // Required? | ||
68 | + 1, // Number of args expected. | ||
69 | + 0, // Delimiter if expecting multiple args. | ||
70 | + "praet option.", // Help description. | ||
71 | + "-p", // Flag token. | ||
72 | + "-praet", // Flag token. | ||
73 | + "--praet" // Flag token. | ||
74 | + ); | ||
75 | + | ||
76 | + opt.parse(argc, argv); | ||
77 | + | ||
78 | + if (opt.isSet("-h")) { | ||
79 | + printCLIUsage(opt, cout); | ||
80 | + return 0; | ||
81 | + } | ||
17 | 82 | ||
18 | -int main(int argc, char** argv) { | ||
19 | Morfeusz morfeusz; | 83 | Morfeusz morfeusz; |
20 | - if (argc > 1) { | ||
21 | - morfeusz.setAnalyzerFile(argv[1]); | ||
22 | - printf("Using dictionary from %s\n", argv[1]); | 84 | + if (opt.isSet("-i")) { |
85 | + string analyzerFile; | ||
86 | + opt.get("-i")->getString(analyzerFile); | ||
87 | + morfeusz.setAnalyzerFile(analyzerFile); | ||
88 | + printf("Using dictionary from %s\n", analyzerFile.c_str()); | ||
89 | + } | ||
90 | + if (opt.isSet("-a")) { | ||
91 | + string aggl; | ||
92 | + opt.get("-a")->getString(aggl); | ||
93 | + cerr << "setting aggl option to " << aggl << endl; | ||
94 | + morfeusz.setAggl(aggl); | ||
95 | + } | ||
96 | + if (opt.isSet("-p")) { | ||
97 | + string praet; | ||
98 | + opt.get("-p")->getString(praet); | ||
99 | + cerr << "setting praet option to " << praet << endl; | ||
100 | + morfeusz.setPraet(praet); | ||
23 | } | 101 | } |
24 | #ifdef _WIN32 | 102 | #ifdef _WIN32 |
25 | morfeusz.setCharset(CP852); | 103 | morfeusz.setCharset(CP852); |
@@ -37,10 +115,11 @@ int main(int argc, char** argv) { | @@ -37,10 +115,11 @@ int main(int argc, char** argv) { | ||
37 | printf("["); | 115 | printf("["); |
38 | for (unsigned int i = 0; i < res.size(); i++) { | 116 | for (unsigned int i = 0; i < res.size(); i++) { |
39 | MorphInterpretation& mi = res[i]; | 117 | MorphInterpretation& mi = res[i]; |
40 | - if (prevStart != -1 | 118 | + if (prevStart != -1 |
41 | && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) { | 119 | && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) { |
42 | printf("]\n["); | 120 | printf("]\n["); |
43 | - } else if (prevStart != -1) { | 121 | + } |
122 | + else if (prevStart != -1) { | ||
44 | printf("; "); | 123 | printf("; "); |
45 | } | 124 | } |
46 | printf("%d,%d,%s,%s,%s,%s", | 125 | printf("%d,%d,%s,%s,%s,%s", |
morfeusz/segrules/segrules.cpp
@@ -28,8 +28,12 @@ static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) { | @@ -28,8 +28,12 @@ static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) { | ||
28 | 28 | ||
29 | static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) { | 29 | static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) { |
30 | SegrulesOptions res; | 30 | SegrulesOptions res; |
31 | - res["aggl"] = deserializeString(ptr); | ||
32 | - res["praet"] = deserializeString(ptr); | 31 | + unsigned char optsNum = *ptr; |
32 | + ptr++; | ||
33 | + for (unsigned char i = 0; i < optsNum; i++) { | ||
34 | + string key = deserializeString(ptr); | ||
35 | + res[key] = deserializeString(ptr); | ||
36 | + } | ||
33 | return res; | 37 | return res; |
34 | } | 38 | } |
35 | 39 | ||
@@ -54,3 +58,35 @@ map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* an | @@ -54,3 +58,35 @@ map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* an | ||
54 | } | 58 | } |
55 | return res; | 59 | return res; |
56 | } | 60 | } |
61 | + | ||
62 | +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr) { | ||
63 | + const unsigned char* fsasMapPtr = getFSAsMapPtr(ptr); | ||
64 | + const unsigned char* currPtr = fsasMapPtr; | ||
65 | + unsigned char fsasNum = *currPtr; | ||
66 | + currPtr++; | ||
67 | + for (unsigned char i = 0; i < fsasNum; i++) { | ||
68 | + deserializeOptions(currPtr); | ||
69 | + deserializeFSA(currPtr); | ||
70 | + } | ||
71 | + return deserializeOptions(currPtr); | ||
72 | +} | ||
73 | + | ||
74 | +SegrulesFSA* getDefaultSegrulesFSA( | ||
75 | + const map<SegrulesOptions, SegrulesFSA*>& map, | ||
76 | + const unsigned char* ptr) { | ||
77 | + SegrulesOptions opts = getDefaultSegrulesOptions(ptr); | ||
78 | + return (*(map.find(opts))).second; | ||
79 | +} | ||
80 | + | ||
81 | +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res) { | ||
82 | + map<SegrulesOptions, SegrulesFSA*>::const_iterator it = res.begin(); | ||
83 | + while (it != res.end()) { | ||
84 | + SegrulesOptions::const_iterator it1 = it->first.begin(); | ||
85 | + while (it1 != it->first.end()) { | ||
86 | + cerr << it1->first << " --> " << it1->second << endl; | ||
87 | + it1++; | ||
88 | + } | ||
89 | + cerr << it->second << endl; | ||
90 | + it++; | ||
91 | + } | ||
92 | +} |
morfeusz/segrules/segrules.hpp
@@ -18,6 +18,9 @@ typedef std::map<std::string, std::string> SegrulesOptions; | @@ -18,6 +18,9 @@ typedef std::map<std::string, std::string> SegrulesOptions; | ||
18 | //typedef FSA<unsigned char> SegrulesFSAType; | 18 | //typedef FSA<unsigned char> SegrulesFSAType; |
19 | 19 | ||
20 | std::map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* analyzerPtr); | 20 | std::map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* analyzerPtr); |
21 | +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr); | ||
22 | +SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map, const unsigned char* analyzerPtr); | ||
23 | +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res); | ||
21 | 24 | ||
22 | #endif /* SEGRULES_HPP */ | 25 | #endif /* SEGRULES_HPP */ |
23 | 26 |
nbproject/configurations.xml
@@ -279,7 +279,7 @@ | @@ -279,7 +279,7 @@ | ||
279 | <ccTool> | 279 | <ccTool> |
280 | <incDir> | 280 | <incDir> |
281 | <pElem>morfeusz</pElem> | 281 | <pElem>morfeusz</pElem> |
282 | - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | 282 | + <pElem>/usr/lib/jvm/default-java/include</pElem> |
283 | </incDir> | 283 | </incDir> |
284 | <preprocessorList> | 284 | <preprocessorList> |
285 | <Elem>libjmorfeusz_EXPORTS</Elem> | 285 | <Elem>libjmorfeusz_EXPORTS</Elem> |