Commit 9cacc587215ba16ccaea4e7ff865ad40a36596ac
1 parent
f1e52ff4
- dodanie opcji aggl i praet
- dodanie obsługi CLI w morfeusz_analyzer - ogólne ogarnięcie generatora git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@115 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
21 changed files
with
2690 additions
and
275 deletions
CMakeLists.txt
... | ... | @@ -5,7 +5,7 @@ project (Morfeusz) |
5 | 5 | set (Morfeusz_VERSION_MAJOR 2) |
6 | 6 | set (Morfeusz_VERSION_MINOR 0) |
7 | 7 | set (Morfeusz_VERSION_PATCH 0) |
8 | -set (CMAKE_BUILD_TYPE "Release") | |
8 | +set (CMAKE_BUILD_TYPE "Debug") | |
9 | 9 | |
10 | 10 | enable_testing() |
11 | 11 | |
... | ... | @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "") |
36 | 36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
37 | 37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
38 | 38 | else () |
39 | - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | |
39 | + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | |
40 | 40 | endif () |
41 | 41 | endif () |
42 | 42 | |
... | ... | @@ -109,7 +109,7 @@ set (CPACK_OUTPUT_FILE_PREFIX "${TARGET_DIR}") |
109 | 109 | if (${CMAKE_SYSTEM_NAME} MATCHES "Linux") |
110 | 110 | set (CPACK_GENERATOR "DEB" "TGZ") |
111 | 111 | #debian |
112 | - set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz") | |
112 | + set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz2") | |
113 | 113 | set (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") |
114 | 114 | set (CPACK_DEBIAN_PACKAGE_DEPENDS "libstdc++6 (>= 4.6)") |
115 | 115 | set (CPACK_DEBIAN_PACKAGE_ARCHITECTURE "${ARCHITECTURE}") |
... | ... |
fsabuilder/buildfsa.py
... | ... | @@ -261,9 +261,11 @@ def main(opts): |
261 | 261 | if __name__ == '__main__': |
262 | 262 | import os |
263 | 263 | opts = _parseOptions() |
264 | -# try: | |
265 | - main(opts) | |
264 | + try: | |
265 | + main(opts) | |
266 | 266 | # except Exception as ex: |
267 | -# raise ex | |
268 | 267 | # print >> sys.stderr, unicode(ex).encode('utf8') |
268 | +# sys.exit(1) | |
269 | + finally: | |
270 | + pass | |
269 | 271 | |
... | ... |
fsabuilder/morfeuszbuilder/fsa/encode.py
... | ... | @@ -26,7 +26,6 @@ class Encoder(object): |
26 | 26 | |
27 | 27 | def encodeData(self, data): |
28 | 28 | raise NotImplementedError() |
29 | -# return bytearray(u'|'.join(data).encode(self.encoding)) + bytearray([0]) | |
30 | 29 | |
31 | 30 | def decodeData(self, rawData): |
32 | 31 | return NotImplementedError() |
... | ... | @@ -134,16 +133,8 @@ class Encoder(object): |
134 | 133 | |
135 | 134 | for typenum, interpsList in segnum2Interps.iteritems(): |
136 | 135 | res.extend(self._encodeInterps4Type(typenum, interpsList, withCasePattern, withPrefix)) |
137 | - | |
138 | - | |
139 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | |
140 | -# encodedInterpsList.extend(self._encodeTypeNum(interp.typenum)) | |
141 | -# encodedInterpsList.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=withCasePattern, withPrefix=withPrefix)) | |
142 | -# encodedInterpsList.extend(self._encodeTagNum(interp.tagnum)) | |
143 | -# encodedInterpsList.extend(self._encodeNameNum(interp.namenum)) | |
144 | 136 | del interpsList |
145 | -# res.extend(serializationUtils.htons(len(encodedInterpsList))) | |
146 | -# res.extend(encodedInterpsList) | |
137 | + | |
147 | 138 | return res |
148 | 139 | |
149 | 140 | class MorphEncoder(Encoder): |
... | ... | @@ -156,19 +147,6 @@ class MorphEncoder(Encoder): |
156 | 147 | |
157 | 148 | def encodeData(self, interpsList): |
158 | 149 | return self._doEncodeData(interpsList, withCasePattern=True, withPrefix=False) |
159 | -# res = bytearray() | |
160 | -# firstByte = len(interpsList) | |
161 | -# assert firstByte < 256 | |
162 | -# assert firstByte > 0 | |
163 | -# res.append(firstByte) | |
164 | -# assert type(interpsList) == frozenset | |
165 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | |
166 | -# res.extend(self._encodeTypeNum(interp.typenum)) | |
167 | -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=True, withPrefix=False)) | |
168 | -# res.extend(self._encodeTagNum(interp.tagnum)) | |
169 | -# res.extend(self._encodeNameNum(interp.namenum)) | |
170 | -# del interpsList | |
171 | -# return res | |
172 | 150 | |
173 | 151 | class Encoder4Generator(Encoder): |
174 | 152 | |
... | ... | @@ -177,18 +155,3 @@ class Encoder4Generator(Encoder): |
177 | 155 | |
178 | 156 | def encodeData(self, interpsList): |
179 | 157 | return self._doEncodeData(interpsList, withCasePattern=False, withPrefix=True) |
180 | -# res = bytearray() | |
181 | -# firstByte = len(interpsList) | |
182 | -# assert firstByte < 256 | |
183 | -# assert firstByte > 0 | |
184 | -# res.append(firstByte) | |
185 | -# assert type(interpsList) == frozenset | |
186 | -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()): | |
187 | -# res.extend(self._encodeTypeNum(interp.typenum)) | |
188 | -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=False, withPrefix=True)) | |
189 | -# res.extend(self._encodeTagNum(interp.tagnum)) | |
190 | -# res.extend(self._encodeNameNum(interp.namenum)) | |
191 | -# return res | |
192 | -# | |
193 | -# def decodeData(self, data): | |
194 | -# | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/rulesManager.py
... | ... | @@ -11,6 +11,7 @@ class RulesManager(object): |
11 | 11 | def __init__(self, segtypes): |
12 | 12 | self.options2DFA = {} |
13 | 13 | self.segtypes = segtypes |
14 | + self.defaultOptions = None | |
14 | 15 | |
15 | 16 | def _options2Key(self, optionsMap): |
16 | 17 | return frozenset(optionsMap.items()) |
... | ... | @@ -21,6 +22,9 @@ class RulesManager(object): |
21 | 22 | def getDFA(self, optionsMap): |
22 | 23 | return self.options2DFA[self._options2Key(optionsMap)] |
23 | 24 | |
25 | + def setDefaultOptions(self, key2Def): | |
26 | + self.defaultOptions = key2Def | |
27 | + | |
24 | 28 | def addDFA(self, optionsMap, dfa): |
25 | 29 | self.options2DFA[self._options2Key(optionsMap)] = dfa |
26 | 30 | |
... | ... | @@ -40,13 +44,17 @@ class RulesManager(object): |
40 | 44 | optionsMap = self._key2Options(key) |
41 | 45 | res.extend(self._serializeOptionsMap(optionsMap)) |
42 | 46 | res.extend(self._serializeDFA(dfa)) |
47 | + res.extend(self._serializeOptionsMap(self.defaultOptions)) | |
43 | 48 | logging.info('segmentation rules size: %s bytes', len(res)) |
44 | 49 | return res |
45 | 50 | |
46 | 51 | def _serializeOptionsMap(self, optionsMap): |
47 | 52 | assert len(optionsMap) < 256 |
48 | 53 | res = bytearray() |
54 | + res.append(2) | |
55 | + res.extend(self._serializeString('aggl')) | |
49 | 56 | res.extend(self._serializeString(optionsMap['aggl'])) |
57 | + res.extend(self._serializeString('praet')) | |
50 | 58 | res.extend(self._serializeString(optionsMap['praet'])) |
51 | 59 | return res |
52 | 60 | |
... | ... | @@ -63,4 +71,4 @@ class RulesManager(object): |
63 | 71 | # res.append(len(string)) |
64 | 72 | res.extend(string.encode('utf8')) |
65 | 73 | res.append(0) |
66 | - return res | |
67 | 74 | \ No newline at end of file |
75 | + return res | |
... | ... |
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
... | ... | @@ -46,14 +46,14 @@ class RulesParser(object): |
46 | 46 | def2Key[define] = key |
47 | 47 | |
48 | 48 | firstNFA = None |
49 | - for defs in itertools.product(*key2Defs.values()): | |
49 | + for idx, defs in enumerate(itertools.product(*key2Defs.values())): | |
50 | 50 | key2Def = dict([(def2Key[define], define) for define in defs]) |
51 | 51 | # print key2Def |
52 | 52 | nfa = rulesNFA.RulesNFA() |
53 | 53 | if not firstNFA: |
54 | 54 | firstNFA = nfa |
55 | 55 | section = 'combinations' if self.rulesType == RulesParser.PARSE4ANALYZER else 'generator combinations' |
56 | - combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section) | |
56 | + combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section, ignoreComments=False) | |
57 | 57 | combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs, filename)) |
58 | 58 | for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename): |
59 | 59 | # print rule |
... | ... | @@ -64,6 +64,8 @@ class RulesParser(object): |
64 | 64 | # dfa.debug() |
65 | 65 | # print dfa.tryToRecognize(bytearray([14])) |
66 | 66 | res.addDFA(key2Def, dfa) |
67 | + if idx == 0: | |
68 | + res.setDefaultOptions(key2Def) | |
67 | 69 | return res |
68 | 70 | |
69 | 71 | def _doParse(self, combinationEnumeratedLines, segtypesHelper, filename): |
... | ... |
fsabuilder/morfeuszbuilder/utils/configFile.py
... | ... | @@ -46,8 +46,13 @@ class ConfigFile(object): |
46 | 46 | else: |
47 | 47 | return None |
48 | 48 | |
49 | - def enumerateLinesInSection(self, sectionName): | |
50 | - return self.section2Lines[sectionName] | |
49 | + def enumerateLinesInSection(self, sectionName, ignoreComments=True): | |
50 | + if sectionName not in self.section2Lines: | |
51 | + raise exceptions.ConfigFileException(self.filename, None, u'Missing section: "%s"' % sectionName) | |
52 | + if not ignoreComments: | |
53 | + return self.section2Lines[sectionName] | |
54 | + else: | |
55 | + return [(linenum, line) for (linenum, line) in self.section2Lines[sectionName] if not line.startswith('#')] | |
51 | 56 | |
52 | 57 | def _parse(self): |
53 | 58 | with codecs.open(self.filename, 'r', 'utf8') as f: |
... | ... |
fsabuilder/morfeuszbuilder/utils/exceptions.py
... | ... | @@ -31,4 +31,7 @@ class ConfigFileException(FSABuilderException): |
31 | 31 | self.msg = msg |
32 | 32 | |
33 | 33 | def __str__(self): |
34 | - return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg) | |
34 | + if self.lineNum: | |
35 | + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg) | |
36 | + else: | |
37 | + return u'%s - %s' % (self.filename, self.msg) | |
... | ... |
input/segmenty.dat
... | ... | @@ -3,6 +3,7 @@ aggl=permissive strict isolated |
3 | 3 | praet=split composite |
4 | 4 | |
5 | 5 | [combinations] |
6 | + | |
6 | 7 | #define wsz_interp (interp|kropka|dywiz)* |
7 | 8 | |
8 | 9 | #define moze_interp(segmenty) wsz_interp segmenty wsz_interp |
... | ... | @@ -19,7 +20,7 @@ samotny |
19 | 20 | # przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”: |
20 | 21 | moze_interp(praet_sg_na) |
21 | 22 | |
22 | -# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „moze”: | |
23 | +# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „czytał”: | |
23 | 24 | moze_interp(praet_sg) |
24 | 25 | |
25 | 26 | # przeszlik mnogi, np. „czytali”: |
... | ... | @@ -69,7 +70,7 @@ moze_interp(praet_sg by aglsg) |
69 | 70 | # np. „gnietli·by·śmy” |
70 | 71 | moze_interp(praet_pl by aglpl) |
71 | 72 | #else |
72 | -# moze_interp(praetcond) | |
73 | +#moze_interp(praetcond) | |
73 | 74 | #endif |
74 | 75 | # np. „by·ś” |
75 | 76 | moze_interp(by aglsg) |
... | ... | @@ -97,13 +98,13 @@ moze_interp( (adja dywiz)+ adj ) |
97 | 98 | # adja dywiz adja dywiz adja dywiz adj interp? |
98 | 99 | # adja dywiz adja dywiz adja dywiz adja dywiz adj interp? |
99 | 100 | |
100 | -# Formy zanegowane stopnia wyższego przymiotników i przysłówków (WK) | |
101 | -# np. „nie·grzeczniejszy”, „nie·grzeczniej” | |
102 | -moze_interp( nie> adj_com ) | |
101 | +# Stopień najwyższy: | |
102 | +# np. „naj·zieleńszy”, „naj·mądrzej” | |
103 | +moze_interp( naj> adj_sup ) | |
103 | 104 | |
104 | 105 | # Formy „zanegowane” gerundiów i imiesłowów: |
105 | 106 | # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”: |
106 | -moze_interp( nie> negat ) | |
107 | +moze_interp( nie > negat ) | |
107 | 108 | |
108 | 109 | # Przyimki akceptujące krótką formę „-ń” |
109 | 110 | moze_interp(z_on_agl) |
... | ... | @@ -115,17 +116,11 @@ moze_interp( dig>* dig ) |
115 | 116 | |
116 | 117 | # Formacje prefiksalne |
117 | 118 | #### trzeba wydzielić odpowiednie samodze! |
118 | -# rzeczownikowe | |
119 | -# np. „euro·sodoma”, „e-·papieros” | |
120 | -moze_interp(nomina) | |
121 | -moze_interp( prefs> nomina ) | |
119 | +# rzeczownikowe i przymiotnikowe | |
120 | +# np. „euro·sodoma”, „e-·papieros”, „euro·sodomski”, „bez·argumentowy” | |
121 | +moze_interp( prefs> samodz ) | |
122 | 122 | # czasownikowe np. „po·nakapywać” |
123 | -moze_interp(verba_imperf) | |
124 | -moze_interp( prefv> verba_imperf ) | |
125 | -# przymiotnikowe np. „do·żylny”, „euro·sodomski”, „bez·argumentowy” | |
126 | -moze_interp(adjectiva) | |
127 | -moze_interp(prefa> adj) | |
128 | -moze_interp( prefa> adjectiva ) | |
123 | +moze_interp( prefv> samodz ) | |
129 | 124 | |
130 | 125 | # Apozycje z dywizem |
131 | 126 | # np. „kobieta-prezydent” |
... | ... | @@ -138,26 +133,12 @@ adj dywiz samodz |
138 | 133 | # ? |
139 | 134 | samodz dywiz adj |
140 | 135 | |
141 | -#### PONIŻEJ REGUŁY WK | |
142 | -# Stopień najwyższy: | |
143 | -# np. „naj·zieleńszy”, „naj·mądrzej” | |
144 | -moze_interp( naj> adj_sup ) | |
145 | -moze_interp( nie> naj> adj_sup ) | |
146 | -# Cząstka li przy osobowych formach czasownika oddzielona dywizem: znasz-li ten kraj | |
147 | -moze_interp( praet_sg dywiz li) | |
148 | -moze_interp( praet_pl dywiz li) | |
149 | -moze_interp( praet_sg_na dywiz li) | |
150 | -moze_interp( fin dywiz li) | |
151 | - | |
152 | -# i bez dywizu --- czy bez dywizu jest sens to łapać? | |
153 | -#moze_interp( praet_sg li) | |
154 | -#moze_interp( praet_pl li) | |
155 | -#moze_interp( praet_sg_na li) | |
156 | -#moze_interp( fin li) | |
157 | - | |
158 | 136 | [generator combinations] |
159 | -prefs> nomina | |
160 | -nomina | |
137 | +adj | |
138 | +adj_sup | |
139 | +samodz | |
140 | +prefs> samodz | |
141 | +prefv> samodz | |
161 | 142 | |
162 | 143 | [segment types] |
163 | 144 | naj |
... | ... | @@ -193,45 +174,16 @@ dywiz |
193 | 174 | kropka |
194 | 175 | samodz |
195 | 176 | |
196 | -[lexemes] | |
197 | -z_aglt aby:comp | |
198 | -z_aglt bowiem:comp | |
199 | -by by:qub | |
200 | -li li:qub | |
201 | -z_aglt by:comp | |
202 | -z_aglt cóż:subst | |
203 | -z_aglt czemu:adv | |
204 | -z_aglt czyżby:qub | |
205 | -z_aglt choćby:comp | |
206 | -z_aglt chociażby:comp | |
207 | -z_aglt dlaczego:adv | |
208 | -z_aglt dopóki:comp | |
209 | -z_aglt dopóty:conj | |
210 | -z_aglt gdyby:comp | |
211 | -z_aglt gdzie:qub | |
212 | -z_aglt gdzie:adv | |
213 | -z_aglt jakby:comp | |
214 | -z_aglt jakoby:comp | |
215 | -z_aglt kiedy:adv | |
216 | -z_aglt kiedy:comp | |
217 | -z_aglt tylko:qub | |
218 | -z_aglt żeby:comp | |
219 | -dywiz -:interp | |
220 | -kropka .:interp | |
221 | - | |
222 | 177 | [tags] |
223 | 178 | naj naj |
224 | 179 | nie nie |
225 | 180 | prefs prefs |
226 | 181 | prefv prefv |
227 | -prefa prefa | |
228 | 182 | dig dig |
229 | 183 | adja adja |
230 | 184 | adj adj:%:pos |
231 | 185 | adj_sup adj:%:sup |
232 | 186 | adj_sup adv:sup |
233 | -adj_com adj:%:com | |
234 | -adj_com adj:%:com | |
235 | 187 | negat ger:%:neg |
236 | 188 | negat pact:%:neg |
237 | 189 | negat ppas:%:neg |
... | ... | @@ -243,22 +195,39 @@ samotny interj |
243 | 195 | interp interp |
244 | 196 | aglsg aglt:sg:% |
245 | 197 | aglpl aglt:pl:% |
198 | +#praetcond cond:% | |
199 | +#praetcond praet:%:pri:% | |
200 | +#praetcond praet:%:sec:% | |
201 | +#praetcond praet:%:ter:% | |
246 | 202 | praet_sg_agl praet:sg:%:agl |
247 | 203 | praet_sg_na praet:sg:%:nagl |
248 | 204 | praet_sg praet:sg:% |
249 | 205 | praet_pl praet:pl:% |
250 | 206 | praet_sg winien:sg:% |
251 | 207 | praet_pl winien:pl:% |
252 | -fin fin:% | |
253 | -nomina subst:% | |
254 | -nomina ger:% | |
255 | -nomina depr:% | |
256 | -adjectiva adv:% | |
257 | -adjectiva ppas:% | |
258 | -adjectiva pact:% | |
259 | -verba_imperf praet:%:imperf | |
260 | -verba_imperf fin:%:imperf | |
261 | -verba_imperf inf:imperf | |
262 | -verba_imperf imps:imperf | |
263 | -verba_imperf impt:%:imperf | |
264 | 208 | samodz % |
209 | + | |
210 | +[lexemes] | |
211 | +z_aglt aby:comp | |
212 | +z_aglt bowiem:comp | |
213 | +by by:qub | |
214 | +z_aglt by:comp | |
215 | +z_aglt cóż:subst | |
216 | +z_aglt czemu:adv | |
217 | +z_aglt czyżby:qub | |
218 | +z_aglt choćby:comp | |
219 | +z_aglt chociażby:comp | |
220 | +z_aglt dlaczego:adv | |
221 | +z_aglt dopóki:comp | |
222 | +z_aglt dopóty:conj | |
223 | +z_aglt gdyby:comp | |
224 | +z_aglt gdzie:qub | |
225 | +z_aglt gdzie:adv | |
226 | +z_aglt jakby:comp | |
227 | +z_aglt jakoby:comp | |
228 | +z_aglt kiedy:adv | |
229 | +z_aglt kiedy:comp | |
230 | +z_aglt tylko:qub | |
231 | +z_aglt żeby:comp | |
232 | +dywiz -:interp | |
233 | +kropka .:interp | |
... | ... |
morfeusz/CMakeLists.txt
1 | 1 | |
2 | + | |
2 | 3 | ########## generate default dictionary data ################# |
3 | 4 | add_custom_command ( |
4 | 5 | OUTPUT "${INPUT_DICTIONARY_CPP}" |
... | ... | @@ -57,7 +58,7 @@ set(INCLUDE_FILES |
57 | 58 | |
58 | 59 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
59 | 60 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) |
60 | -set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz") | |
61 | +set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") | |
61 | 62 | |
62 | 63 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
63 | 64 | add_executable (morfeusz_generator morfeusz_generator.cpp) |
... | ... |
morfeusz/Environment.cpp
... | ... | @@ -21,13 +21,6 @@ static Deserializer<vector<InterpsGroup> >& initializeDeserializer(MorfeuszProce |
21 | 21 | return *(processorType == ANALYZER ? analyzerDeserializer : generatorDeserializer); |
22 | 22 | } |
23 | 23 | |
24 | -static SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map) { | |
25 | - SegrulesOptions opts; | |
26 | - opts["aggl"] = "isolated"; | |
27 | - opts["praet"] = "split"; | |
28 | - return (*(map.find(opts))).second; | |
29 | -} | |
30 | - | |
31 | 24 | static void deleteSegrulesFSAs(std::map<SegrulesOptions, SegrulesFSA*>& fsasMap) { |
32 | 25 | for ( |
33 | 26 | std::map<SegrulesOptions, SegrulesFSA*>::iterator it = fsasMap.begin(); |
... | ... | @@ -43,23 +36,23 @@ Environment::Environment( |
43 | 36 | MorfeuszProcessorType processorType, |
44 | 37 | const unsigned char* fsaFileStartPtr) |
45 | 38 | : currentCharsetConverter(getCharsetConverter(charset)), |
46 | - utf8CharsetConverter(), | |
47 | - isoCharsetConverter(), | |
48 | - cp1250CharsetConverter(), | |
49 | - cp852CharsetConverter(), | |
50 | - caseConverter(), | |
51 | - tagset(fsaFileStartPtr), | |
52 | - fsaFileStartPtr(fsaFileStartPtr), | |
53 | - fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))), | |
54 | - segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)), | |
55 | - currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap)), | |
56 | - isFromFile(false), | |
57 | - chunksDecoder( | |
58 | - processorType == ANALYZER | |
59 | - ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this) | |
60 | - : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)), | |
61 | - processorType(processorType) | |
62 | - { | |
39 | +utf8CharsetConverter(), | |
40 | +isoCharsetConverter(), | |
41 | +cp1250CharsetConverter(), | |
42 | +cp852CharsetConverter(), | |
43 | +caseConverter(), | |
44 | +tagset(fsaFileStartPtr), | |
45 | +fsaFileStartPtr(fsaFileStartPtr), | |
46 | +fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))), | |
47 | +segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)), | |
48 | +currSegrulesOptions(getDefaultSegrulesOptions(fsaFileStartPtr)), | |
49 | +currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap, fsaFileStartPtr)), | |
50 | +isFromFile(false), | |
51 | +chunksDecoder( | |
52 | +processorType == ANALYZER | |
53 | +? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this) | |
54 | +: (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)), | |
55 | +processorType(processorType) { | |
63 | 56 | } |
64 | 57 | |
65 | 58 | const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const { |
... | ... | @@ -129,3 +122,16 @@ const FSAType& Environment::getFSA() const { |
129 | 122 | const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const { |
130 | 123 | return *(this->chunksDecoder); |
131 | 124 | } |
125 | + | |
126 | +void Environment::setSegrulesOption(const std::string& option, const std::string& value) { | |
127 | + if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) { | |
128 | + throw MorfeuszException("Invalid segmentation option '"+option+"'"); | |
129 | + } | |
130 | + SegrulesOptions prevOptions = this->currSegrulesOptions; | |
131 | + this->currSegrulesOptions[option] = value; | |
132 | + if (this->segrulesFSAsMap.find(this->currSegrulesOptions) == this->segrulesFSAsMap.end()) { | |
133 | + this->currSegrulesOptions = prevOptions; | |
134 | + throw MorfeuszException("Invalid '"+option+"' option value: '"+value+"'"); | |
135 | + } | |
136 | + this->currSegrulesFSA = this->segrulesFSAsMap.find(this->currSegrulesOptions)->second; | |
137 | +} | |
... | ... |
morfeusz/Environment.hpp
... | ... | @@ -41,6 +41,8 @@ public: |
41 | 41 | |
42 | 42 | void setFSAFile(const std::string& filename); |
43 | 43 | |
44 | + void setSegrulesOption(const std::string& option, const std::string& value); | |
45 | + | |
44 | 46 | const SegrulesFSA& getCurrentSegrulesFSA() const; |
45 | 47 | |
46 | 48 | const FSAType& getFSA() const; |
... | ... | @@ -60,6 +62,7 @@ private: |
60 | 62 | const unsigned char* fsaFileStartPtr; |
61 | 63 | const FSAType* fsa; |
62 | 64 | std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap; |
65 | + SegrulesOptions currSegrulesOptions; | |
63 | 66 | const SegrulesFSA* currSegrulesFSA; |
64 | 67 | bool isFromFile; |
65 | 68 | |
... | ... |
morfeusz/InterpretedChunksDecoder.hpp
... | ... | @@ -29,21 +29,40 @@ public: |
29 | 29 | : env(env) { |
30 | 30 | } |
31 | 31 | |
32 | + virtual ~InterpretedChunksDecoder() { | |
33 | + } | |
34 | + | |
32 | 35 | virtual void decode( |
33 | 36 | unsigned int startNode, |
34 | 37 | unsigned int endNode, |
35 | 38 | const InterpretedChunk& interpretedChunk, |
36 | 39 | std::vector<MorphInterpretation>& out) const = 0; |
37 | 40 | |
38 | - virtual ~InterpretedChunksDecoder() { | |
39 | - } | |
40 | - | |
41 | 41 | protected: |
42 | + | |
43 | + virtual MorphInterpretation decodeMorphInterpretation( | |
44 | + unsigned int startNode, unsigned int endNode, | |
45 | + const string& orth, | |
46 | + const string& lemmaPrefix, | |
47 | + const InterpretedChunk& chunk, | |
48 | + const unsigned char*& ptr) const = 0; | |
42 | 49 | |
43 | 50 | virtual void decodeForm( |
44 | 51 | const std::vector<uint32_t>& orth, |
45 | 52 | const EncodedForm& form, |
46 | 53 | std::string& res) const = 0; |
54 | + | |
55 | + EncodedInterpretation deserializeInterp(const unsigned char*& ptr) const { | |
56 | + EncodedInterpretation interp; | |
57 | + deserializeEncodedForm(ptr, interp.value); | |
58 | + interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr))); | |
59 | + ptr += 2; | |
60 | + interp.nameClassifier = *ptr; | |
61 | + ptr++; | |
62 | + return interp; | |
63 | + } | |
64 | + | |
65 | + virtual void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const = 0; | |
47 | 66 | |
48 | 67 | const Environment& env; |
49 | 68 | }; |
... | ... | @@ -53,7 +72,7 @@ public: |
53 | 72 | |
54 | 73 | InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) { |
55 | 74 | } |
56 | - | |
75 | + | |
57 | 76 | void decode( |
58 | 77 | unsigned int startNode, |
59 | 78 | unsigned int endNode, |
... | ... | @@ -90,42 +109,12 @@ protected: |
90 | 109 | } |
91 | 110 | } |
92 | 111 | |
93 | -private: | |
94 | - | |
95 | - void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& originalForm, std::string& decodedForm) const { | |
96 | - for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | |
97 | - const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | |
98 | - originalForm += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | |
99 | - const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | |
100 | - MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, originalForm, string(""), prefixChunk, ptr); | |
101 | - decodedForm += mi.getLemma(); | |
102 | - } | |
103 | - } | |
104 | - | |
105 | - MorphInterpretation decodeMorphInterpretation( | |
106 | - unsigned int startNode, unsigned int endNode, | |
107 | - const string& orth, | |
108 | - const string& lemmaPrefix, | |
109 | - const InterpretedChunk& chunk, | |
110 | - const unsigned char*& ptr) const { | |
111 | - string lemma = lemmaPrefix; | |
112 | - EncodedInterpretation ei = this->decodeInterp(ptr); | |
113 | - this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma); | |
114 | - return MorphInterpretation( | |
115 | - startNode, endNode, | |
116 | - orth, lemma, | |
117 | - ei.tag, | |
118 | - ei.nameClassifier, | |
119 | - env.getTagset(), | |
120 | - env.getCharsetConverter()); | |
121 | - } | |
122 | - | |
123 | - void decodeLemma(const unsigned char*& ptr, EncodedForm& lemma) const { | |
124 | - lemma.suffixToCut = *ptr; | |
112 | + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const { | |
113 | + encodedForm.suffixToCut = *ptr; | |
125 | 114 | ptr++; |
126 | - lemma.suffixToAdd = (const char*) ptr; | |
115 | + encodedForm.suffixToAdd = (const char*) ptr; | |
127 | 116 | ptr += strlen((const char*) ptr) + 1; |
128 | - assert(lemma.casePattern.size() == 0); | |
117 | + assert(encodedForm.casePattern.size() == 0); | |
129 | 118 | // lemma.casePattern.resize(MAX_WORD_SIZE, false); |
130 | 119 | uint8_t casePatternType = *ptr; |
131 | 120 | ptr++; |
... | ... | @@ -139,7 +128,7 @@ private: |
139 | 128 | ptr++; |
140 | 129 | for (unsigned int i = 0; i < prefixLength; i++) { |
141 | 130 | // lemma.casePattern[i] = true; |
142 | - lemma.casePattern.push_back(true); | |
131 | + encodedForm.casePattern.push_back(true); | |
143 | 132 | } |
144 | 133 | // lemma.casePattern.resize(prefixLength, true); |
145 | 134 | break; |
... | ... | @@ -150,21 +139,40 @@ private: |
150 | 139 | uint8_t idx = *ptr; |
151 | 140 | ptr++; |
152 | 141 | // lemma.casePattern[idx] = true; |
153 | - lemma.casePattern.resize(idx + 1, false); | |
154 | - lemma.casePattern[idx] = true; | |
142 | + encodedForm.casePattern.resize(idx + 1, false); | |
143 | + encodedForm.casePattern[idx] = true; | |
155 | 144 | } |
156 | 145 | break; |
157 | 146 | } |
158 | 147 | } |
159 | - | |
160 | - EncodedInterpretation decodeInterp(const unsigned char*& ptr) const { | |
161 | - EncodedInterpretation interp; | |
162 | - decodeLemma(ptr, interp.value); | |
163 | - interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr))); | |
164 | - ptr += 2; | |
165 | - interp.nameClassifier = *ptr; | |
166 | - ptr++; | |
167 | - return interp; | |
148 | +private: | |
149 | + | |
150 | + MorphInterpretation decodeMorphInterpretation( | |
151 | + unsigned int startNode, unsigned int endNode, | |
152 | + const string& orth, | |
153 | + const string& lemmaPrefix, | |
154 | + const InterpretedChunk& chunk, | |
155 | + const unsigned char*& ptr) const { | |
156 | + string lemma = lemmaPrefix; | |
157 | + EncodedInterpretation ei = this->deserializeInterp(ptr); | |
158 | + this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma); | |
159 | + return MorphInterpretation( | |
160 | + startNode, endNode, | |
161 | + orth, lemma, | |
162 | + ei.tag, | |
163 | + ei.nameClassifier, | |
164 | + env.getTagset(), | |
165 | + env.getCharsetConverter()); | |
166 | + } | |
167 | + | |
168 | + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const { | |
169 | + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | |
170 | + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | |
171 | + orth += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | |
172 | + const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | |
173 | + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orth, string(""), prefixChunk, ptr); | |
174 | + lemmaPrefix += mi.getLemma(); | |
175 | + } | |
168 | 176 | } |
169 | 177 | }; |
170 | 178 | |
... | ... | @@ -173,35 +181,51 @@ public: |
173 | 181 | |
174 | 182 | InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) { |
175 | 183 | } |
176 | - | |
184 | + | |
177 | 185 | void decode( |
178 | 186 | unsigned int startNode, |
179 | 187 | unsigned int endNode, |
180 | 188 | const InterpretedChunk& interpretedChunk, |
181 | 189 | std::vector<MorphInterpretation>& out) const { |
182 | - // string orth; | |
183 | - // string lemma; | |
184 | - // convertPrefixes(interpretedChunk, lemma, orth); | |
185 | - // size_t orthLength = orth.length(); | |
186 | - // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | |
187 | - // for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) { | |
188 | - // const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i]; | |
189 | - // decodeForm( | |
190 | - // interpretedChunk.originalCodepoints, | |
191 | - // ei.value, | |
192 | - // orth); | |
193 | - // out.push_back(MorphInterpretation( | |
194 | - // startNode, endNode, | |
195 | - // orth, lemma, | |
196 | - // ei.tag, | |
197 | - // ei.nameClassifier, | |
198 | - // env.getTagset(), | |
199 | - // env.getCharsetConverter())); | |
200 | - // orth.erase(orthLength); | |
201 | - // } | |
190 | + string orthPrefix; | |
191 | + string lemma; | |
192 | + convertPrefixes(interpretedChunk, orthPrefix, lemma); | |
193 | + lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | |
194 | + const unsigned char* currPtr = interpretedChunk.interpsGroup.ptr; | |
195 | + while (currPtr - interpretedChunk.interpsGroup.ptr < interpretedChunk.interpsGroup.size) { | |
196 | + out.push_back(this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr)); | |
197 | + } | |
202 | 198 | } |
203 | 199 | |
204 | 200 | private: |
201 | + | |
202 | + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const { | |
203 | + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | |
204 | + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | |
205 | + lemma += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | |
206 | + const unsigned char* ptr = prefixChunk.interpsGroup.ptr; | |
207 | + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr); | |
208 | + orthPrefix += mi.getOrth(); | |
209 | + } | |
210 | + } | |
211 | + | |
212 | + MorphInterpretation decodeMorphInterpretation( | |
213 | + unsigned int startNode, unsigned int endNode, | |
214 | + const string& orthPrefix, | |
215 | + const string& lemma, | |
216 | + const InterpretedChunk& chunk, | |
217 | + const unsigned char*& ptr) const { | |
218 | + string orth = orthPrefix; | |
219 | + EncodedInterpretation ei = this->deserializeInterp(ptr); | |
220 | + this->decodeForm(chunk.originalCodepoints, ei.value, orth); | |
221 | + return MorphInterpretation( | |
222 | + startNode, endNode, | |
223 | + orth, lemma, | |
224 | + ei.tag, | |
225 | + ei.nameClassifier, | |
226 | + env.getTagset(), | |
227 | + env.getCharsetConverter()); | |
228 | + } | |
205 | 229 | |
206 | 230 | void decodeForm( |
207 | 231 | const vector<uint32_t>& lemma, |
... | ... | @@ -218,6 +242,15 @@ private: |
218 | 242 | env.getCharsetConverter().append(cp, res); |
219 | 243 | } |
220 | 244 | } |
245 | + | |
246 | + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const { | |
247 | + encodedForm.prefixToAdd = (const char*) ptr; | |
248 | + ptr += strlen((const char*) ptr) + 1; | |
249 | + encodedForm.suffixToCut = *ptr; | |
250 | + ptr++; | |
251 | + encodedForm.suffixToAdd = (const char*) ptr; | |
252 | + ptr += strlen((const char*) ptr) + 1; | |
253 | + } | |
221 | 254 | }; |
222 | 255 | |
223 | 256 | #endif /* INTERPSGROUPDECODER_HPP */ |
... | ... |
morfeusz/Morfeusz.cpp
... | ... | @@ -40,32 +40,13 @@ options(createDefaultOptions()) { |
40 | 40 | |
41 | 41 | void Morfeusz::setAnalyzerFile(const string& filename) { |
42 | 42 | this->analyzerEnv.setFSAFile(filename); |
43 | - // if (this->isAnalyzerFSAFromFile) { | |
44 | - // delete this->analyzerFSA; | |
45 | - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap); | |
46 | - // delete this->analyzerPtr; | |
47 | - // } | |
48 | - // this->analyzerPtr = readFile<unsigned char>(filename.c_str()); | |
49 | - // this->analyzerFSA = FSA< vector<InterpsGroup> > ::getFSA(analyzerPtr, *initializeAnalyzerDeserializer()); | |
50 | - // this->analyzerSegrulesFSAsMap = createSegrulesFSAsMap(analyzerPtr); | |
51 | - // this->isAnalyzerFSAFromFile = true; | |
52 | 43 | } |
53 | 44 | |
54 | 45 | void Morfeusz::setGeneratorFile(const string& filename) { |
55 | 46 | this->generatorEnv.setFSAFile(filename); |
56 | - // if (this->isGeneratorFSAFromFile) { | |
57 | - // delete this->generatorPtr; | |
58 | - // } | |
59 | - // this->generatorPtr = readFile<unsigned char>(filename.c_str()); | |
60 | - // this->generator.setGeneratorPtr(generatorPtr); | |
61 | 47 | } |
62 | 48 | |
63 | 49 | Morfeusz::~Morfeusz() { |
64 | - // if (this->isAnalyzerFSAFromFile) { | |
65 | - // delete this->analyzerFSA; | |
66 | - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap); | |
67 | - // delete this->analyzerPtr; | |
68 | - // } | |
69 | 50 | } |
70 | 51 | |
71 | 52 | void Morfeusz::processOneWord( |
... | ... | @@ -97,7 +78,6 @@ void Morfeusz::processOneWord( |
97 | 78 | } |
98 | 79 | srcNode++; |
99 | 80 | } |
100 | - // graph.getResults(*this->tagset, results); | |
101 | 81 | } |
102 | 82 | else if (inputStart != inputEnd) { |
103 | 83 | this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results); |
... | ... | @@ -140,6 +120,7 @@ void Morfeusz::doProcessOneWord( |
140 | 120 | vector<InterpsGroup> val(state.getValue()); |
141 | 121 | for (unsigned int i = 0; i < val.size(); i++) { |
142 | 122 | InterpsGroup& ig = val[i]; |
123 | +// cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl; | |
143 | 124 | set<SegrulesState> newSegrulesStates; |
144 | 125 | env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates); |
145 | 126 | for ( |
... | ... | @@ -147,9 +128,6 @@ void Morfeusz::doProcessOneWord( |
147 | 128 | it != newSegrulesStates.end(); |
148 | 129 | ++it) { |
149 | 130 | SegrulesState newSegrulesState = *it; |
150 | -// if (newSegrulesState.shiftOrthFromPrevious) { | |
151 | -// | |
152 | -// } | |
153 | 131 | InterpretedChunk ic = { |
154 | 132 | inputData, |
155 | 133 | originalCodepoints, |
... | ... | @@ -160,12 +138,19 @@ void Morfeusz::doProcessOneWord( |
160 | 138 | vector<InterpretedChunk>() |
161 | 139 | }; |
162 | 140 | if (!accum.empty() && accum.back().shiftOrth) { |
141 | +// cerr << "shift orth from " << (int) accum.back().interpsGroup.type << " to " << (int) ig.type << endl; | |
163 | 142 | doShiftOrth(accum.back(), ic); |
164 | 143 | } |
165 | 144 | accum.push_back(ic); |
166 | 145 | if (isEndOfWord(codepoint)) { |
167 | - if (newSegrulesState.accepting) | |
146 | +// cerr << "end of word" << endl; | |
147 | + if (newSegrulesState.accepting) { | |
148 | +// cerr << "accept " << (int) ig.type << endl; | |
168 | 149 | graph.addPath(accum); |
150 | + } | |
151 | + else { | |
152 | +// cerr << "not accept " << (int) ig.type << endl; | |
153 | + } | |
169 | 154 | } |
170 | 155 | else { |
171 | 156 | const char* newCurrInput = currInput; |
... | ... | @@ -190,8 +175,6 @@ void Morfeusz::appendIgnotiumToResults( |
190 | 175 | } |
191 | 176 | |
192 | 177 | ResultsIterator Morfeusz::analyze(const string& text) const { |
193 | - // const char* textStart = text.c_str(); | |
194 | - // const char* textEnd = text.c_str() + text.length(); | |
195 | 178 | vector<MorphInterpretation> res; |
196 | 179 | this->analyze(text, res); |
197 | 180 | return ResultsIterator(res); |
... | ... | @@ -207,29 +190,54 @@ void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) |
207 | 190 | } |
208 | 191 | |
209 | 192 | ResultsIterator Morfeusz::generate(const string& text) const { |
210 | - // const char* textStart = text.c_str(); | |
211 | - // const char* textEnd = text.c_str() + text.length(); | |
212 | 193 | vector<MorphInterpretation> res; |
213 | 194 | this->generate(text, res); |
214 | 195 | return ResultsIterator(res); |
215 | 196 | } |
216 | 197 | |
217 | -void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results) const { | |
218 | - const char* input = text.c_str(); | |
219 | - const char* inputEnd = input + text.length(); | |
198 | +ResultsIterator Morfeusz::generate(const string& text, int tagnum) const { | |
199 | + vector<MorphInterpretation> res; | |
200 | + this->generate(text, tagnum, res); | |
201 | + return ResultsIterator(res); | |
202 | +} | |
203 | + | |
204 | +void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& results) const { | |
205 | + const char* input = lemma.c_str(); | |
206 | + const char* inputEnd = input + lemma.length(); | |
220 | 207 | while (input != inputEnd) { |
221 | 208 | int startNode = results.empty() ? 0 : results.back().getEndNode(); |
222 | 209 | this->processOneWord(this->generatorEnv, input, inputEnd, startNode, results); |
223 | 210 | } |
224 | 211 | } |
225 | 212 | |
213 | +// XXX - someday it should be improved | |
214 | +void Morfeusz::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const { | |
215 | + vector<MorphInterpretation> partRes; | |
216 | + this->generate(lemma, partRes); | |
217 | + for (unsigned int i = 0; i < partRes.size(); i++) { | |
218 | + if (partRes[i].getTagnum() == tagnum) { | |
219 | + result.push_back(partRes[i]); | |
220 | + } | |
221 | + } | |
222 | +} | |
223 | + | |
226 | 224 | void Morfeusz::setCharset(MorfeuszCharset charset) { |
227 | 225 | this->options.encoding = charset; |
228 | 226 | this->analyzerEnv.setCharset(charset); |
229 | 227 | this->generatorEnv.setCharset(charset); |
230 | 228 | } |
231 | 229 | |
232 | -ResultsIterator::ResultsIterator(vector<MorphInterpretation>& res) { | |
230 | +void Morfeusz::setAggl(const std::string& aggl) { | |
231 | + this->analyzerEnv.setSegrulesOption("aggl", aggl); | |
232 | + this->generatorEnv.setSegrulesOption("aggl", aggl); | |
233 | +} | |
234 | + | |
235 | +void Morfeusz::setPraet(const std::string& praet) { | |
236 | + this->analyzerEnv.setSegrulesOption("praet", praet); | |
237 | + this->generatorEnv.setSegrulesOption("praet", praet); | |
238 | +} | |
239 | + | |
240 | +ResultsIterator::ResultsIterator(const vector<MorphInterpretation>& res) { | |
233 | 241 | resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end()); |
234 | 242 | } |
235 | 243 | |
... | ... |
morfeusz/Morfeusz.hpp
... | ... | @@ -35,6 +35,12 @@ class ResultsIterator; |
35 | 35 | |
36 | 36 | typedef State< std::vector<InterpsGroup > > StateType; |
37 | 37 | |
38 | +/** | |
39 | + * Performs morphological analysis (analyze methods) and syntesis (generate methods). | |
40 | + * | |
41 | + * It is NOT thread-safe | |
42 | + * but it is possible to use separate Morfeusz instance for each concurrent thread. | |
43 | + */ | |
38 | 44 | class Morfeusz { |
39 | 45 | public: |
40 | 46 | |
... | ... | @@ -57,6 +63,9 @@ public: |
57 | 63 | */ |
58 | 64 | void setGeneratorFile(const std::string& filename); |
59 | 65 | |
66 | + /** | |
67 | + * Destroys Morfeusz object. | |
68 | + */ | |
60 | 69 | virtual ~Morfeusz(); |
61 | 70 | |
62 | 71 | /** |
... | ... | @@ -82,6 +91,16 @@ public: |
82 | 91 | * @return - iterator over morphological analysis results |
83 | 92 | */ |
84 | 93 | ResultsIterator generate(const std::string& lemma) const; |
94 | + | |
95 | + /** | |
96 | + * Perform morphological synthesis on a given lemma and return the results as iterator. | |
97 | + * Limit results to interpretations with the specified tag. | |
98 | + * | |
99 | + * @param text - text for morphological analysis | |
100 | + * @param tag - tag of result interpretations | |
101 | + * @return - iterator over morphological analysis results | |
102 | + */ | |
103 | + ResultsIterator generate(const std::string& lemma, int tagnum) const; | |
85 | 104 | |
86 | 105 | /** |
87 | 106 | * Perform morphological synthesis on a given lemma and put results in a vector. |
... | ... | @@ -90,6 +109,16 @@ public: |
90 | 109 | * @param result - results vector |
91 | 110 | */ |
92 | 111 | void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const; |
112 | + | |
113 | + /** | |
114 | + * Perform morphological synthesis on a given lemma and put results in a vector. | |
115 | + * Limit results to interpretations with the specified tag. | |
116 | + * | |
117 | + * @param lemma - lemma to be analyzed | |
118 | + * @param tag - tag of result interpretations | |
119 | + * @param result - results vector | |
120 | + */ | |
121 | + void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const; | |
93 | 122 | |
94 | 123 | /** |
95 | 124 | * Set encoding for input and output string objects. |
... | ... | @@ -97,6 +126,20 @@ public: |
97 | 126 | * @param encoding |
98 | 127 | */ |
99 | 128 | void setCharset(MorfeuszCharset encoding); |
129 | + | |
130 | + /** | |
131 | + * Set aggl segmentation option value. | |
132 | + * | |
133 | + * @param aggl | |
134 | + */ | |
135 | + void setAggl(const std::string& aggl); | |
136 | + | |
137 | + /** | |
138 | + * Set praet segmentation option value. | |
139 | + * | |
140 | + * @param praet | |
141 | + */ | |
142 | + void setPraet(const std::string& praet); | |
100 | 143 | |
101 | 144 | friend class ResultsIterator; |
102 | 145 | private: |
... | ... | @@ -121,19 +164,9 @@ private: |
121 | 164 | const std::string& word, |
122 | 165 | int startNodeNum, |
123 | 166 | std::vector<MorphInterpretation>& results) const; |
167 | + | |
124 | 168 | Environment analyzerEnv; |
125 | 169 | Environment generatorEnv; |
126 | -// const unsigned char* analyzerPtr; | |
127 | -// FSAType* analyzerFSA; | |
128 | -// std::map<SegrulesOptions, SegrulesFSA*> analyzerSegrulesFSAsMap; | |
129 | -// SegrulesFSA* currAnalyzerSegrulesFSA; | |
130 | -// bool isAnalyzerFSAFromFile; | |
131 | -// | |
132 | -// const unsigned char* generatorPtr; | |
133 | -// FSAType* generatorFSA; | |
134 | -// bool isGeneratorFSAFromFile; | |
135 | -// Generator generator; | |
136 | - | |
137 | 170 | MorfeuszOptions options; |
138 | 171 | }; |
139 | 172 | |
... | ... | @@ -143,7 +176,7 @@ public: |
143 | 176 | bool hasNext(); |
144 | 177 | friend class Morfeusz; |
145 | 178 | private: |
146 | - ResultsIterator(vector<MorphInterpretation>& res); | |
179 | + ResultsIterator(const std::vector<MorphInterpretation>& res); | |
147 | 180 | const char* rawInput; |
148 | 181 | std::list<MorphInterpretation> resultsBuffer; |
149 | 182 | int startNode; |
... | ... |
morfeusz/cli/cli.hpp
0 → 100644
1 | +/* | |
2 | + * File: cli.hpp | |
3 | + * Author: mlenart | |
4 | + * | |
5 | + * Created on 17 marzec 2014, 18:32 | |
6 | + */ | |
7 | + | |
8 | +#ifndef CLI_HPP | |
9 | +#define CLI_HPP | |
10 | + | |
11 | +#ifdef _WIN64 | |
12 | +#define TMPDUPA_IN IN | |
13 | +#define IN IN | |
14 | +#else | |
15 | +#ifdef _WIN32 | |
16 | +#define TMPDUPA_IN IN | |
17 | +#define IN IN | |
18 | +#endif | |
19 | +#endif | |
20 | + | |
21 | +#include <iostream> | |
22 | + | |
23 | +#pragma GCC diagnostic push | |
24 | +#pragma GCC diagnostic ignored "-Wsign-compare" | |
25 | +#pragma GCC diagnostic ignored "-Wpedantic" | |
26 | +#pragma GCC diagnostic ignored "-Wunused-variable" | |
27 | +#pragma GCC diagnostic ignored "-Wconversion" | |
28 | +#pragma GCC diagnostic ignored "-Wreorder" | |
29 | +#pragma GCC diagnostic ignored "-Wlong-long" | |
30 | +#pragma GCC diagnostic ignored "-Wunused-function" | |
31 | +#pragma GCC diagnostic ignored "-Wcast-qual" | |
32 | +#pragma GCC diagnostic ignored "-Wparentheses" | |
33 | +#pragma GCC diagnostic ignored "-Wformat-extra-args" | |
34 | + | |
35 | +#include "ezOptionParser.hpp" | |
36 | + | |
37 | +#pragma GCC diagnostic pop | |
38 | + | |
39 | +void printCLIUsage(ez::ezOptionParser& opt, std::ostream& out) { | |
40 | + std::string usage; | |
41 | + opt.getUsage(usage); | |
42 | + out << usage; | |
43 | +} | |
44 | + | |
45 | +#ifdef _WIN64 | |
46 | +#define IN TMPDUPA_IN | |
47 | +#else | |
48 | +#ifdef _WIN32 | |
49 | +#define IN TMPDUPA_IN | |
50 | +#endif | |
51 | +#endif | |
52 | + | |
53 | +#endif /* CLI_HPP */ | |
54 | + | |
... | ... |
morfeusz/cli/ezOptionParser.hpp
0 → 100644
1 | +/* | |
2 | +This file is part of ezOptionParser. See MIT-LICENSE. | |
3 | + | |
4 | +Copyright (C) 2011,2012 Remik Ziemlinski <first d0t surname att gmail> | |
5 | + | |
6 | +CHANGELOG | |
7 | + | |
8 | +v0.0.0 20110505 rsz Created. | |
9 | +v0.1.0 20111006 rsz Added validator. | |
10 | +v0.1.1 20111012 rsz Fixed validation of ulonglong. | |
11 | +v0.1.2 20111126 rsz Allow flag names start with alphanumeric (previously, flag had to start with alpha). | |
12 | +v0.1.3 20120108 rsz Created work-around for unique id generation with IDGenerator that avoids retarded c++ translation unit linker errors with single-header static variables. Forced inline on all methods to please retard compiler and avoid multiple def errors. | |
13 | +v0.1.4 20120629 Enforced MIT license on all files. | |
14 | +v0.2.0 20121120 Added parseIndex to OptionGroup. | |
15 | +v0.2.1 20130506 Allow disabling doublespace of OPTIONS usage descriptions. | |
16 | +*/ | |
17 | +#ifndef EZ_OPTION_PARSER_H | |
18 | +#define EZ_OPTION_PARSER_H | |
19 | + | |
20 | +#include <stdlib.h> | |
21 | +#include <vector> | |
22 | +#include <list> | |
23 | +#include <map> | |
24 | +#include <string> | |
25 | +#include <iostream> | |
26 | +#include <fstream> | |
27 | +#include <algorithm> | |
28 | +#include <limits> | |
29 | +#include <sstream> | |
30 | +#include <cstring> | |
31 | + | |
32 | +namespace ez { | |
33 | +#define DEBUGLINE() printf("%s:%d\n", __FILE__, __LINE__); | |
34 | + | |
35 | +/* ################################################################### */ | |
36 | +template<typename T> | |
37 | +static T fromString(const std::string* s) { | |
38 | + std::istringstream stream (s->c_str()); | |
39 | + T t; | |
40 | + stream >> t; | |
41 | + return t; | |
42 | +}; | |
43 | +template<typename T> | |
44 | +static T fromString(const char* s) { | |
45 | + std::istringstream stream (s); | |
46 | + T t; | |
47 | + stream >> t; | |
48 | + return t; | |
49 | +}; | |
50 | +/* ################################################################### */ | |
51 | +static bool isdigit(const std::string & s, int i=0) { | |
52 | + int n = s.length(); | |
53 | + for(; i < n; ++i) | |
54 | + switch(s[i]) { | |
55 | + case '0': case '1': case '2': | |
56 | + case '3': case '4': case '5': | |
57 | + case '6': case '7': case '8': case '9': break; | |
58 | + default: return false; | |
59 | + } | |
60 | + | |
61 | + return true; | |
62 | +}; | |
63 | +/* ################################################################### */ | |
64 | +static bool isdigit(const std::string * s, int i=0) { | |
65 | + int n = s->length(); | |
66 | + for(; i < n; ++i) | |
67 | + switch(s->at(i)) { | |
68 | + case '0': case '1': case '2': | |
69 | + case '3': case '4': case '5': | |
70 | + case '6': case '7': case '8': case '9': break; | |
71 | + default: return false; | |
72 | + } | |
73 | + | |
74 | + return true; | |
75 | +}; | |
76 | +/* ################################################################### */ | |
77 | +/* | |
78 | +Compare strings for opts, so short opt flags come before long format flags. | |
79 | +For example, -d < --dimension < --dmn, and also lower come before upper. The default STL std::string compare doesn't do that. | |
80 | +*/ | |
81 | +static bool CmpOptStringPtr(std::string * s1, std::string * s2) { | |
82 | + int c1,c2; | |
83 | + const char *s=s1->c_str(); | |
84 | + for(c1=0; c1 < s1->size(); ++c1) | |
85 | + if (isalnum(s[c1])) // locale sensitive. | |
86 | + break; | |
87 | + | |
88 | + s=s2->c_str(); | |
89 | + for(c2=0; c2 < s2->size(); ++c2) | |
90 | + if (isalnum(s[c2])) | |
91 | + break; | |
92 | + | |
93 | + // Test which has more symbols before its name. | |
94 | + if (c1 > c2) | |
95 | + return false; | |
96 | + else if (c1 < c2) | |
97 | + return true; | |
98 | + | |
99 | + // Both have same number of symbols, so compare first letter. | |
100 | + char char1 = s1->at(c1); | |
101 | + char char2 = s2->at(c2); | |
102 | + char lo1 = tolower(char1); | |
103 | + char lo2 = tolower(char2); | |
104 | + | |
105 | + if (lo1 != lo2) | |
106 | + return lo1 < lo2; | |
107 | + | |
108 | + // Their case doesn't match, so find which is lower. | |
109 | + char up1 = isupper(char1); | |
110 | + char up2 = isupper(char2); | |
111 | + | |
112 | + if (up1 && !up2) | |
113 | + return false; | |
114 | + else if (!up1 && up2) | |
115 | + return true; | |
116 | + | |
117 | + return (s1->compare(*s2)<0); | |
118 | +}; | |
119 | +/* ################################################################### */ | |
120 | +/* | |
121 | +Makes a vector of strings from one string, | |
122 | +splitting at (and excluding) delimiter "token". | |
123 | +*/ | |
124 | +static void SplitDelim( const std::string& s, const char token, std::vector<std::string*> * result) { | |
125 | + std::string::const_iterator i = s.begin(); | |
126 | + std::string::const_iterator j = s.begin(); | |
127 | + const std::string::const_iterator e = s.end(); | |
128 | + | |
129 | + while(i!=e) { | |
130 | + while(i!=e && *i++!=token); | |
131 | + std::string *newstr = new std::string(j, i); | |
132 | + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1); | |
133 | + result->push_back(newstr); | |
134 | + j = i; | |
135 | + } | |
136 | +}; | |
137 | +/* ################################################################### */ | |
138 | +// Variant that uses deep copies and references instead of pointers (less efficient). | |
139 | +static void SplitDelim( const std::string& s, const char token, std::vector<std::string> & result) { | |
140 | + std::string::const_iterator i = s.begin(); | |
141 | + std::string::const_iterator j = s.begin(); | |
142 | + const std::string::const_iterator e = s.end(); | |
143 | + | |
144 | + while(i!=e) { | |
145 | + while(i!=e && *i++!=token); | |
146 | + std::string newstr(j, i); | |
147 | + if (newstr.at(newstr.size()-1) == token) newstr.erase(newstr.size()-1); | |
148 | + result.push_back(newstr); | |
149 | + j = i; | |
150 | + } | |
151 | +}; | |
152 | +/* ################################################################### */ | |
153 | +// Variant that uses list instead of vector for efficient insertion, etc. | |
154 | +static void SplitDelim( const std::string& s, const char token, std::list<std::string*> & result) { | |
155 | + std::string::const_iterator i = s.begin(); | |
156 | + std::string::const_iterator j = s.begin(); | |
157 | + const std::string::const_iterator e = s.end(); | |
158 | + | |
159 | + while(i!=e) { | |
160 | + while(i!=e && *i++!=token); | |
161 | + std::string *newstr = new std::string(j, i); | |
162 | + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1); | |
163 | + result.push_back(newstr); | |
164 | + j = i; | |
165 | + } | |
166 | +}; | |
167 | +/* ################################################################### */ | |
168 | +static void ToU1(std::string ** strings, unsigned char * out, int n) { | |
169 | + for(int i=0; i < n; ++i) { | |
170 | + out[i] = (unsigned char)atoi(strings[i]->c_str()); | |
171 | + } | |
172 | +}; | |
173 | +/* ################################################################### */ | |
174 | +static void ToS1(std::string ** strings, char * out, int n) { | |
175 | + for(int i=0; i < n; ++i) { | |
176 | + out[i] = (char)atoi(strings[i]->c_str()); | |
177 | + } | |
178 | +}; | |
179 | +/* ################################################################### */ | |
180 | +static void ToU2(std::string ** strings, unsigned short * out, int n) { | |
181 | + for(int i=0; i < n; ++i) { | |
182 | + out[i] = (unsigned short)atoi(strings[i]->c_str()); | |
183 | + } | |
184 | +}; | |
185 | +/* ################################################################### */ | |
186 | +static void ToS2(std::string ** strings, short * out, int n) { | |
187 | + for(int i=0; i < n; ++i) { | |
188 | + out[i] = (short)atoi(strings[i]->c_str()); | |
189 | + } | |
190 | +}; | |
191 | +/* ################################################################### */ | |
192 | +static void ToS4(std::string ** strings, int * out, int n) { | |
193 | + for(int i=0; i < n; ++i) { | |
194 | + out[i] = atoi(strings[i]->c_str()); | |
195 | + } | |
196 | +}; | |
197 | +/* ################################################################### */ | |
198 | +static void ToU4(std::string ** strings, unsigned int * out, int n) { | |
199 | + for(int i=0; i < n; ++i) { | |
200 | + out[i] = (unsigned int)strtoul(strings[i]->c_str(), NULL, 0); | |
201 | + } | |
202 | +}; | |
203 | +/* ################################################################### */ | |
204 | +static void ToS8(std::string ** strings, long long * out, int n) { | |
205 | + for(int i=0; i < n; ++i) { | |
206 | + std::stringstream ss(strings[i]->c_str()); | |
207 | + ss >> out[i]; | |
208 | + } | |
209 | +}; | |
210 | +/* ################################################################### */ | |
211 | +static void ToU8(std::string ** strings, unsigned long long * out, int n) { | |
212 | + for(int i=0; i < n; ++i) { | |
213 | + std::stringstream ss(strings[i]->c_str()); | |
214 | + ss >> out[i]; | |
215 | + } | |
216 | +}; | |
217 | +/* ################################################################### */ | |
218 | +static void ToF(std::string ** strings, float * out, int n) { | |
219 | + for(int i=0; i < n; ++i) { | |
220 | + out[i] = (float)atof(strings[i]->c_str()); | |
221 | + } | |
222 | +}; | |
223 | +/* ################################################################### */ | |
224 | +static void ToD(std::string ** strings, double * out, int n) { | |
225 | + for(int i=0; i < n; ++i) { | |
226 | + out[i] = (double)atof(strings[i]->c_str()); | |
227 | + } | |
228 | +}; | |
229 | +/* ################################################################### */ | |
230 | +static void StringsToInts(std::vector<std::string> & strings, std::vector<int> & out) { | |
231 | + for(int i=0; i < strings.size(); ++i) { | |
232 | + out.push_back(atoi(strings[i].c_str())); | |
233 | + } | |
234 | +}; | |
235 | +/* ################################################################### */ | |
236 | +static void StringsToInts(std::vector<std::string*> * strings, std::vector<int> * out) { | |
237 | + for(int i=0; i < strings->size(); ++i) { | |
238 | + out->push_back(atoi(strings->at(i)->c_str())); | |
239 | + } | |
240 | +}; | |
241 | +/* ################################################################### */ | |
242 | +static void StringsToLongs(std::vector<std::string> & strings, std::vector<long> & out) { | |
243 | + for(int i=0; i < strings.size(); ++i) { | |
244 | + out.push_back(atol(strings[i].c_str())); | |
245 | + } | |
246 | +}; | |
247 | +/* ################################################################### */ | |
248 | +static void StringsToLongs(std::vector<std::string*> * strings, std::vector<long> * out) { | |
249 | + for(int i=0; i < strings->size(); ++i) { | |
250 | + out->push_back(atol(strings->at(i)->c_str())); | |
251 | + } | |
252 | +}; | |
253 | +/* ################################################################### */ | |
254 | +static void StringsToULongs(std::vector<std::string> & strings, std::vector<unsigned long> & out) { | |
255 | + for(int i=0; i < strings.size(); ++i) { | |
256 | + out.push_back(strtoul(strings[i].c_str(),0,0)); | |
257 | + } | |
258 | +}; | |
259 | +/* ################################################################### */ | |
260 | +static void StringsToULongs(std::vector<std::string*> * strings, std::vector<unsigned long> * out) { | |
261 | + for(int i=0; i < strings->size(); ++i) { | |
262 | + out->push_back(strtoul(strings->at(i)->c_str(),0,0)); | |
263 | + } | |
264 | +}; | |
265 | +/* ################################################################### */ | |
266 | +static void StringsToFloats(std::vector<std::string> & strings, std::vector<float> & out) { | |
267 | + for(int i=0; i < strings.size(); ++i) { | |
268 | + out.push_back(atof(strings[i].c_str())); | |
269 | + } | |
270 | +}; | |
271 | +/* ################################################################### */ | |
272 | +static void StringsToFloats(std::vector<std::string*> * strings, std::vector<float> * out) { | |
273 | + for(int i=0; i < strings->size(); ++i) { | |
274 | + out->push_back(atof(strings->at(i)->c_str())); | |
275 | + } | |
276 | +}; | |
277 | +/* ################################################################### */ | |
278 | +static void StringsToDoubles(std::vector<std::string> & strings, std::vector<double> & out) { | |
279 | + for(int i=0; i < strings.size(); ++i) { | |
280 | + out.push_back(atof(strings[i].c_str())); | |
281 | + } | |
282 | +}; | |
283 | +/* ################################################################### */ | |
284 | +static void StringsToDoubles(std::vector<std::string*> * strings, std::vector<double> * out) { | |
285 | + for(int i=0; i < strings->size(); ++i) { | |
286 | + out->push_back(atof(strings->at(i)->c_str())); | |
287 | + } | |
288 | +}; | |
289 | +/* ################################################################### */ | |
290 | +static void StringsToStrings(std::vector<std::string*> * strings, std::vector<std::string> * out) { | |
291 | + for(int i=0; i < strings->size(); ++i) { | |
292 | + out->push_back( *strings->at(i) ); | |
293 | + } | |
294 | +}; | |
295 | +/* ################################################################### */ | |
296 | +static void ToLowerASCII(std::string & s) { | |
297 | + int n = s.size(); | |
298 | + int i=0; | |
299 | + char c; | |
300 | + for(; i < n; ++i) { | |
301 | + c = s[i]; | |
302 | + if(c<='Z' && c>='A') | |
303 | + s[i] = c+32; | |
304 | + } | |
305 | +} | |
306 | +/* ################################################################### */ | |
307 | +static char** CommandLineToArgvA(char* CmdLine, int* _argc) { | |
308 | + char** argv; | |
309 | + char* _argv; | |
310 | + unsigned long len; | |
311 | + unsigned long argc; | |
312 | + char a; | |
313 | + unsigned long i, j; | |
314 | + | |
315 | + bool in_QM; | |
316 | + bool in_TEXT; | |
317 | + bool in_SPACE; | |
318 | + | |
319 | + len = strlen(CmdLine); | |
320 | + i = ((len+2)/2)*sizeof(void*) + sizeof(void*); | |
321 | + | |
322 | + argv = (char**)malloc(i + (len+2)*sizeof(char)); | |
323 | + | |
324 | + _argv = (char*)(((unsigned char*)argv)+i); | |
325 | + | |
326 | + argc = 0; | |
327 | + argv[argc] = _argv; | |
328 | + in_QM = false; | |
329 | + in_TEXT = false; | |
330 | + in_SPACE = true; | |
331 | + i = 0; | |
332 | + j = 0; | |
333 | + | |
334 | + while( a = CmdLine[i] ) { | |
335 | + if(in_QM) { | |
336 | + if( (a == '\"') || | |
337 | + (a == '\'')) // rsz. Added single quote. | |
338 | + { | |
339 | + in_QM = false; | |
340 | + } else { | |
341 | + _argv[j] = a; | |
342 | + j++; | |
343 | + } | |
344 | + } else { | |
345 | + switch(a) { | |
346 | + case '\"': | |
347 | + case '\'': // rsz. Added single quote. | |
348 | + in_QM = true; | |
349 | + in_TEXT = true; | |
350 | + if(in_SPACE) { | |
351 | + argv[argc] = _argv+j; | |
352 | + argc++; | |
353 | + } | |
354 | + in_SPACE = false; | |
355 | + break; | |
356 | + case ' ': | |
357 | + case '\t': | |
358 | + case '\n': | |
359 | + case '\r': | |
360 | + if(in_TEXT) { | |
361 | + _argv[j] = '\0'; | |
362 | + j++; | |
363 | + } | |
364 | + in_TEXT = false; | |
365 | + in_SPACE = true; | |
366 | + break; | |
367 | + default: | |
368 | + in_TEXT = true; | |
369 | + if(in_SPACE) { | |
370 | + argv[argc] = _argv+j; | |
371 | + argc++; | |
372 | + } | |
373 | + _argv[j] = a; | |
374 | + j++; | |
375 | + in_SPACE = false; | |
376 | + break; | |
377 | + } | |
378 | + } | |
379 | + i++; | |
380 | + } | |
381 | + _argv[j] = '\0'; | |
382 | + argv[argc] = NULL; | |
383 | + | |
384 | + (*_argc) = argc; | |
385 | + return argv; | |
386 | +}; | |
387 | +/* ################################################################### */ | |
388 | +// Create unique ids with static and still allow single header that avoids multiple definitions linker error. | |
389 | +class ezOptionParserIDGenerator { | |
390 | +public: | |
391 | + static ezOptionParserIDGenerator& instance () { static ezOptionParserIDGenerator Generator; return Generator; } | |
392 | + short next () { return ++_id; } | |
393 | +private: | |
394 | + ezOptionParserIDGenerator() : _id(-1) {} | |
395 | + short _id; | |
396 | +}; | |
397 | +/* ################################################################### */ | |
398 | +/* Validate a value by checking: | |
399 | +- if as string, see if converted value is within datatype's limits, | |
400 | +- and see if falls within a desired range, | |
401 | +- or see if within set of given list of values. | |
402 | + | |
403 | +If comparing with a range, the values list must contain one or two values. One value is required when comparing with <, <=, >, >=. Use two values when requiring a test such as <x<, <=x<, <x<=, <=x<=. | |
404 | +A regcomp/regexec based class could be created in the future if a need arises. | |
405 | +*/ | |
406 | +class ezOptionValidator { | |
407 | +public: | |
408 | + inline ezOptionValidator(const char* _type, const char* _op=0, const char* list=0, bool _insensitive=false); | |
409 | + inline ezOptionValidator(char _type); | |
410 | + inline ezOptionValidator(char _type, char _op, const char* list, int _size); | |
411 | + inline ezOptionValidator(char _type, char _op, const unsigned char* list, int _size); | |
412 | + inline ezOptionValidator(char _type, char _op, const short* list, int _size); | |
413 | + inline ezOptionValidator(char _type, char _op, const unsigned short* list, int _size); | |
414 | + inline ezOptionValidator(char _type, char _op, const int* list, int _size); | |
415 | + inline ezOptionValidator(char _type, char _op, const unsigned int* list, int _size); | |
416 | + inline ezOptionValidator(char _type, char _op, const long long* list, int _size); | |
417 | + inline ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size=0); | |
418 | + inline ezOptionValidator(char _type, char _op, const float* list, int _size); | |
419 | + inline ezOptionValidator(char _type, char _op, const double* list, int _size); | |
420 | + inline ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive); | |
421 | + inline ~ezOptionValidator(); | |
422 | + | |
423 | + inline bool isValid(const std::string * value); | |
424 | + inline void print(); | |
425 | + inline void reset(); | |
426 | + | |
427 | + /* If value must be in custom range, use these comparison modes. */ | |
428 | + enum OP { NOOP=0, | |
429 | + LT, /* value < list[0] */ | |
430 | + LE, /* value <= list[0] */ | |
431 | + GT, /* value > list[0] */ | |
432 | + GE, /* value >= list[0] */ | |
433 | + GTLT, /* list[0] < value < list[1] */ | |
434 | + GELT, /* list[0] <= value < list[1] */ | |
435 | + GELE, /* list[0] <= value <= list[1] */ | |
436 | + GTLE, /* list[0] < value <= list[1] */ | |
437 | + IN /* if value is in list */ | |
438 | + }; | |
439 | + | |
440 | + enum TYPE { NOTYPE=0, S1, U1, S2, U2, S4, U4, S8, U8, F, D, T }; | |
441 | + enum TYPE2 { NOTYPE2=0, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT, DOUBLE, TEXT }; | |
442 | + | |
443 | + union { | |
444 | + unsigned char *u1; | |
445 | + char *s1; | |
446 | + unsigned short *u2; | |
447 | + short *s2; | |
448 | + unsigned int *u4; | |
449 | + int *s4; | |
450 | + unsigned long long *u8; | |
451 | + long long *s8; | |
452 | + float *f; | |
453 | + double *d; | |
454 | + std::string** t; | |
455 | + }; | |
456 | + | |
457 | + char op; | |
458 | + bool quiet; | |
459 | + short id; | |
460 | + char type; | |
461 | + int size; | |
462 | + bool insensitive; | |
463 | +}; | |
464 | +/* ------------------------------------------------------------------- */ | |
465 | +ezOptionValidator::~ezOptionValidator() { | |
466 | + reset(); | |
467 | +}; | |
468 | +/* ------------------------------------------------------------------- */ | |
469 | +void ezOptionValidator::reset() { | |
470 | + #define CLEAR(TYPE,P) case TYPE: if (P) delete [] P; P = 0; break; | |
471 | + switch(type) { | |
472 | + CLEAR(S1,s1); | |
473 | + CLEAR(U1,u1); | |
474 | + CLEAR(S2,s2); | |
475 | + CLEAR(U2,u2); | |
476 | + CLEAR(S4,s4); | |
477 | + CLEAR(U4,u4); | |
478 | + CLEAR(S8,s8); | |
479 | + CLEAR(U8,u8); | |
480 | + CLEAR(F,f); | |
481 | + CLEAR(D,d); | |
482 | + case T: | |
483 | + for(int i=0; i < size; ++i) | |
484 | + delete t[i]; | |
485 | + | |
486 | + delete [] t; | |
487 | + t = 0; | |
488 | + break; | |
489 | + default: break; | |
490 | + } | |
491 | + | |
492 | + size = 0; | |
493 | + op = NOOP; | |
494 | + type = NOTYPE; | |
495 | +}; | |
496 | +/* ------------------------------------------------------------------- */ | |
497 | +ezOptionValidator::ezOptionValidator(char _type) : insensitive(0), op(0), size(0), s1(0), type(_type), quiet(0) { | |
498 | + id = ezOptionParserIDGenerator::instance().next(); | |
499 | +}; | |
500 | +/* ------------------------------------------------------------------- */ | |
501 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const char* list, int _size) : insensitive(0), op(_op), size(_size), s1(0), type(_type), quiet(0) { | |
502 | + id = ezOptionParserIDGenerator::instance().next(); | |
503 | + s1 = new char[size]; | |
504 | + memcpy(s1, list, size); | |
505 | +}; | |
506 | +/* ------------------------------------------------------------------- */ | |
507 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned char* list, int _size) : insensitive(0), op(_op), size(_size), u1(0), type(_type), quiet(0) { | |
508 | + id = ezOptionParserIDGenerator::instance().next(); | |
509 | + u1 = new unsigned char[size]; | |
510 | + memcpy(u1, list, size); | |
511 | +}; | |
512 | +/* ------------------------------------------------------------------- */ | |
513 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const short* list, int _size) : insensitive(0), op(_op), size(_size), s2(0), type(_type), quiet(0) { | |
514 | + id = ezOptionParserIDGenerator::instance().next(); | |
515 | + s2 = new short[size]; | |
516 | + memcpy(s2, list, size*sizeof(short)); | |
517 | +}; | |
518 | +/* ------------------------------------------------------------------- */ | |
519 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned short* list, int _size) : insensitive(0), op(_op), size(_size), u2(0), type(_type), quiet(0) { | |
520 | + id = ezOptionParserIDGenerator::instance().next(); | |
521 | + u2 = new unsigned short[size]; | |
522 | + memcpy(u2, list, size*sizeof(unsigned short)); | |
523 | +}; | |
524 | +/* ------------------------------------------------------------------- */ | |
525 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const int* list, int _size) : insensitive(0), op(_op), size(_size), s4(0), type(_type), quiet(0) { | |
526 | + id = ezOptionParserIDGenerator::instance().next(); | |
527 | + s4 = new int[size]; | |
528 | + memcpy(s4, list, size*sizeof(int)); | |
529 | +}; | |
530 | +/* ------------------------------------------------------------------- */ | |
531 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned int* list, int _size) : insensitive(0), op(_op), size(_size), u4(0), type(_type), quiet(0) { | |
532 | + id = ezOptionParserIDGenerator::instance().next(); | |
533 | + u4 = new unsigned int[size]; | |
534 | + memcpy(u4, list, size*sizeof(unsigned int)); | |
535 | +}; | |
536 | +/* ------------------------------------------------------------------- */ | |
537 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const long long* list, int _size) : insensitive(0), op(_op), size(_size), s8(0), type(_type), quiet(0) { | |
538 | + id = ezOptionParserIDGenerator::instance().next(); | |
539 | + s8 = new long long[size]; | |
540 | + memcpy(s8, list, size*sizeof(long long)); | |
541 | +}; | |
542 | +/* ------------------------------------------------------------------- */ | |
543 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size) : insensitive(0), op(_op), size(_size), u8(0), type(_type), quiet(0) { | |
544 | + id = ezOptionParserIDGenerator::instance().next(); | |
545 | + u8 = new unsigned long long[size]; | |
546 | + memcpy(u8, list, size*sizeof(unsigned long long)); | |
547 | +}; | |
548 | +/* ------------------------------------------------------------------- */ | |
549 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const float* list, int _size) : insensitive(0), op(_op), size(_size), f(0), type(_type), quiet(0) { | |
550 | + id = ezOptionParserIDGenerator::instance().next(); | |
551 | + f = new float[size]; | |
552 | + memcpy(f, list, size*sizeof(float)); | |
553 | +}; | |
554 | +/* ------------------------------------------------------------------- */ | |
555 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const double* list, int _size) : insensitive(0), op(_op), size(_size), d(0), type(_type), quiet(0) { | |
556 | + id = ezOptionParserIDGenerator::instance().next(); | |
557 | + d = new double[size]; | |
558 | + memcpy(d, list, size*sizeof(double)); | |
559 | +}; | |
560 | +/* ------------------------------------------------------------------- */ | |
561 | +ezOptionValidator::ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive) : insensitive(_insensitive), op(_op), size(_size), t(0), type(_type), quiet(0) { | |
562 | + id = ezOptionParserIDGenerator::instance().next(); | |
563 | + t = new std::string*[size]; | |
564 | + int i=0; | |
565 | + | |
566 | + for(; i < size; ++i) { | |
567 | + t[i] = new std::string(list[i]); | |
568 | + } | |
569 | +}; | |
570 | +/* ------------------------------------------------------------------- */ | |
571 | +/* Less efficient but convenient ctor that parses strings to setup validator. | |
572 | +_type: s1, u1, s2, u2, ..., f, d, t | |
573 | +_op: lt, gt, ..., in | |
574 | +_list: comma-delimited string | |
575 | +*/ | |
576 | +ezOptionValidator::ezOptionValidator(const char* _type, const char* _op, const char* _list, bool _insensitive) : insensitive(_insensitive), size(0), t(0), type(0), quiet(0) { | |
577 | + id = ezOptionParserIDGenerator::instance().next(); | |
578 | + | |
579 | + switch(_type[0]) { | |
580 | + case 'u': | |
581 | + switch(_type[1]) { | |
582 | + case '1': type = U1; break; | |
583 | + case '2': type = U2; break; | |
584 | + case '4': type = U4; break; | |
585 | + case '8': type = U8; break; | |
586 | + default: break; | |
587 | + } | |
588 | + break; | |
589 | + case 's': | |
590 | + switch(_type[1]) { | |
591 | + case '1': type = S1; | |
592 | + break; | |
593 | + case '2': type = S2; break; | |
594 | + case '4': type = S4; break; | |
595 | + case '8': type = S8; break; | |
596 | + default: break; | |
597 | + } | |
598 | + break; | |
599 | + case 'f': type = F; break; | |
600 | + case 'd': type = D; break; | |
601 | + case 't': type = T; break; | |
602 | + default: | |
603 | + if (!quiet) | |
604 | + std::cerr << "ERROR: Unknown validator datatype \"" << _type << "\".\n"; | |
605 | + break; | |
606 | + } | |
607 | + | |
608 | + int nop = 0; | |
609 | + if (_op != 0) | |
610 | + nop = strlen(_op); | |
611 | + | |
612 | + switch(nop) { | |
613 | + case 0: op = NOOP; break; | |
614 | + case 2: | |
615 | + switch(_op[0]) { | |
616 | + case 'g': | |
617 | + switch(_op[1]) { | |
618 | + case 'e': op = GE; break; | |
619 | + default: op = GT; break; | |
620 | + } | |
621 | + break; | |
622 | + case 'i': op = IN; | |
623 | + break; | |
624 | + default: | |
625 | + switch(_op[1]) { | |
626 | + case 'e': op = LE; break; | |
627 | + default: op = LT; break; | |
628 | + } | |
629 | + break; | |
630 | + } | |
631 | + break; | |
632 | + case 4: | |
633 | + switch(_op[1]) { | |
634 | + case 'e': | |
635 | + switch(_op[3]) { | |
636 | + case 'e': op = GELE; break; | |
637 | + default: op = GELT; break; | |
638 | + } | |
639 | + break; | |
640 | + default: | |
641 | + switch(_op[3]) { | |
642 | + case 'e': op = GTLE; break; | |
643 | + default: op = GTLT; break; | |
644 | + } | |
645 | + break; | |
646 | + } | |
647 | + break; | |
648 | + default: | |
649 | + if (!quiet) | |
650 | + std::cerr << "ERROR: Unknown validator operation \"" << _op << "\".\n"; | |
651 | + break; | |
652 | + } | |
653 | + | |
654 | + if (_list == 0) return; | |
655 | + // Create list of strings and then cast to native datatypes. | |
656 | + std::string unsplit(_list); | |
657 | + std::list<std::string*> split; | |
658 | + std::list<std::string*>::iterator it; | |
659 | + SplitDelim(unsplit, ',', split); | |
660 | + size = split.size(); | |
661 | + std::string **strings = new std::string*[size]; | |
662 | + | |
663 | + int i = 0; | |
664 | + for(it = split.begin(); it != split.end(); ++it) | |
665 | + strings[i++] = *it; | |
666 | + | |
667 | + if (insensitive) | |
668 | + for(i=0; i < size; ++i) | |
669 | + ToLowerASCII(*strings[i]); | |
670 | + | |
671 | + #define FreeStrings() { \ | |
672 | + for(i=0; i < size; ++i)\ | |
673 | + delete strings[i];\ | |
674 | + delete [] strings;\ | |
675 | + } | |
676 | + | |
677 | + #define ToArray(T,P,Y) case T: P = new Y[size]; To##T(strings, P, size); FreeStrings(); break; | |
678 | + switch(type) { | |
679 | + ToArray(S1,s1,char); | |
680 | + ToArray(U1,u1,unsigned char); | |
681 | + ToArray(S2,s2,short); | |
682 | + ToArray(U2,u2,unsigned short); | |
683 | + ToArray(S4,s4,int); | |
684 | + ToArray(U4,u4,unsigned int); | |
685 | + ToArray(S8,s8,long long); | |
686 | + ToArray(U8,u8,unsigned long long); | |
687 | + ToArray(F,f,float); | |
688 | + ToArray(D,d,double); | |
689 | + case T: t = strings; break; /* Don't erase strings array. */ | |
690 | + default: break; | |
691 | + } | |
692 | +}; | |
693 | +/* ------------------------------------------------------------------- */ | |
694 | +void ezOptionValidator::print() { | |
695 | + printf("id=%d, op=%d, type=%d, size=%d, insensitive=%d\n", id, op, type, size, insensitive); | |
696 | +}; | |
697 | +/* ------------------------------------------------------------------- */ | |
698 | +bool ezOptionValidator::isValid(const std::string * valueAsString) { | |
699 | + if (valueAsString == 0) return false; | |
700 | + | |
701 | +#define CHECKRANGE(E,T) {\ | |
702 | + std::stringstream ss(valueAsString->c_str()); \ | |
703 | + long long E##value; \ | |
704 | + ss >> E##value; \ | |
705 | + long long E##min = static_cast<long long>(std::numeric_limits<T>::min()); \ | |
706 | + if (E##value < E##min) { \ | |
707 | + if (!quiet) \ | |
708 | + std::cerr << "ERROR: Invalid value " << E##value << " is less than datatype min " << E##min << ".\n"; \ | |
709 | + return false; \ | |
710 | + } \ | |
711 | + \ | |
712 | + long long E##max = static_cast<long long>(std::numeric_limits<T>::max()); \ | |
713 | + if (E##value > E##max) { \ | |
714 | + if (!quiet) \ | |
715 | + std::cerr << "ERROR: Invalid value " << E##value << " is greater than datatype max " << E##max << ".\n"; \ | |
716 | + return false; \ | |
717 | + } \ | |
718 | +} | |
719 | + // Check if within datatype limits. | |
720 | + if (type != T) { | |
721 | + switch(type) { | |
722 | + case S1: CHECKRANGE(S1,char); break; | |
723 | + case U1: CHECKRANGE(U1,unsigned char); break; | |
724 | + case S2: CHECKRANGE(S2,short); break; | |
725 | + case U2: CHECKRANGE(U2,unsigned short); break; | |
726 | + case S4: CHECKRANGE(S4,int); break; | |
727 | + case U4: CHECKRANGE(U4,unsigned int); break; | |
728 | + case S8: { | |
729 | + if ( (valueAsString->at(0) == '-') && | |
730 | + isdigit(valueAsString,1) && | |
731 | + (valueAsString->size() > 19) && | |
732 | + (valueAsString->compare(1, 19, "9223372036854775808") > 0) ) { | |
733 | + if (!quiet) | |
734 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min -9223372036854775808.\n"; | |
735 | + return false; | |
736 | + } | |
737 | + | |
738 | + if (isdigit(valueAsString) && | |
739 | + (valueAsString->size() > 18) && | |
740 | + valueAsString->compare("9223372036854775807") > 0) { | |
741 | + if (!quiet) | |
742 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 9223372036854775807.\n"; | |
743 | + return false; | |
744 | + } | |
745 | + } break; | |
746 | + case U8: { | |
747 | + if (valueAsString->compare("0") < 0) { | |
748 | + if (!quiet) | |
749 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min 0.\n"; | |
750 | + return false; | |
751 | + } | |
752 | + | |
753 | + if (isdigit(valueAsString) && | |
754 | + (valueAsString->size() > 19) && | |
755 | + valueAsString->compare("18446744073709551615") > 0) { | |
756 | + if (!quiet) | |
757 | + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 18446744073709551615.\n"; | |
758 | + return false; | |
759 | + } | |
760 | + } break; | |
761 | + case F: { | |
762 | + double dmax = static_cast<double>(std::numeric_limits<float>::max()); | |
763 | + double dvalue = atof(valueAsString->c_str()); | |
764 | + double dmin = -dmax; | |
765 | + if (dvalue < dmin) { | |
766 | + if (!quiet) { | |
767 | + fprintf(stderr, "ERROR: Invalid value %g is less than datatype min %g.\n", dvalue, dmin); | |
768 | + } | |
769 | + return false; | |
770 | + } | |
771 | + | |
772 | + if (dvalue > dmax) { | |
773 | + if (!quiet) | |
774 | + std::cerr << "ERROR: Invalid value " << dvalue << " is greater than datatype max " << dmax << ".\n"; | |
775 | + return false; | |
776 | + } | |
777 | + } break; | |
778 | + case D: { | |
779 | + long double ldmax = static_cast<long double>(std::numeric_limits<double>::max()); | |
780 | + std::stringstream ss(valueAsString->c_str()); | |
781 | + long double ldvalue; | |
782 | + ss >> ldvalue; | |
783 | + long double ldmin = -ldmax; | |
784 | + | |
785 | + if (ldvalue < ldmin) { | |
786 | + if (!quiet) | |
787 | + std::cerr << "ERROR: Invalid value " << ldvalue << " is less than datatype min " << ldmin << ".\n"; | |
788 | + return false; | |
789 | + } | |
790 | + | |
791 | + if (ldvalue > ldmax) { | |
792 | + if (!quiet) | |
793 | + std::cerr << "ERROR: Invalid value " << ldvalue << " is greater than datatype max " << ldmax << ".\n"; | |
794 | + return false; | |
795 | + } | |
796 | + } break; | |
797 | + case NOTYPE: default: break; | |
798 | + } | |
799 | + } else { | |
800 | + if (op == IN) { | |
801 | + int i=0; | |
802 | + if (insensitive) { | |
803 | + std::string valueAsStringLower(*valueAsString); | |
804 | + ToLowerASCII(valueAsStringLower); | |
805 | + for(; i < size; ++i) { | |
806 | + if (valueAsStringLower.compare(t[i]->c_str()) == 0) | |
807 | + return true; | |
808 | + } | |
809 | + } else { | |
810 | + for(; i < size; ++i) { | |
811 | + if (valueAsString->compare(t[i]->c_str()) == 0) | |
812 | + return true; | |
813 | + } | |
814 | + } | |
815 | + return false; | |
816 | + } | |
817 | + } | |
818 | + | |
819 | + // Only check datatype limits, and return; | |
820 | + if (op == NOOP) return true; | |
821 | + | |
822 | +#define VALIDATE(T, U, LIST) { \ | |
823 | + /* Value string converted to true native type. */ \ | |
824 | + std::stringstream ss(valueAsString->c_str());\ | |
825 | + U v;\ | |
826 | + ss >> v;\ | |
827 | + /* Check if within list. */ \ | |
828 | + if (op == IN) { \ | |
829 | + T * last = LIST + size;\ | |
830 | + return (last != std::find(LIST, last, v)); \ | |
831 | + } \ | |
832 | + \ | |
833 | + /* Check if within user's custom range. */ \ | |
834 | + T v0, v1; \ | |
835 | + if (size > 0) { \ | |
836 | + v0 = LIST[0]; \ | |
837 | + } \ | |
838 | + \ | |
839 | + if (size > 1) { \ | |
840 | + v1 = LIST[1]; \ | |
841 | + } \ | |
842 | + \ | |
843 | + switch (op) {\ | |
844 | + case LT:\ | |
845 | + if (size > 0) {\ | |
846 | + return v < v0;\ | |
847 | + } else {\ | |
848 | + std::cerr << "ERROR: No value given to validate if " << v << " < X.\n";\ | |
849 | + return false;\ | |
850 | + }\ | |
851 | + break;\ | |
852 | + case LE:\ | |
853 | + if (size > 0) {\ | |
854 | + return v <= v0;\ | |
855 | + } else {\ | |
856 | + std::cerr << "ERROR: No value given to validate if " << v << " <= X.\n";\ | |
857 | + return false;\ | |
858 | + }\ | |
859 | + break;\ | |
860 | + case GT:\ | |
861 | + if (size > 0) {\ | |
862 | + return v > v0;\ | |
863 | + } else {\ | |
864 | + std::cerr << "ERROR: No value given to validate if " << v << " > X.\n";\ | |
865 | + return false;\ | |
866 | + }\ | |
867 | + break;\ | |
868 | + case GE:\ | |
869 | + if (size > 0) {\ | |
870 | + return v >= v0;\ | |
871 | + } else {\ | |
872 | + std::cerr << "ERROR: No value given to validate if " << v << " >= X.\n";\ | |
873 | + return false;\ | |
874 | + }\ | |
875 | + break;\ | |
876 | + case GTLT:\ | |
877 | + if (size > 1) {\ | |
878 | + return (v0 < v) && (v < v1);\ | |
879 | + } else {\ | |
880 | + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " < X2.\n";\ | |
881 | + return false;\ | |
882 | + }\ | |
883 | + break;\ | |
884 | + case GELT:\ | |
885 | + if (size > 1) {\ | |
886 | + return (v0 <= v) && (v < v1);\ | |
887 | + } else {\ | |
888 | + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " < X2.\n";\ | |
889 | + return false;\ | |
890 | + }\ | |
891 | + break;\ | |
892 | + case GELE:\ | |
893 | + if (size > 1) {\ | |
894 | + return (v0 <= v) && (v <= v1);\ | |
895 | + } else {\ | |
896 | + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " <= X2.\n";\ | |
897 | + return false;\ | |
898 | + }\ | |
899 | + break;\ | |
900 | + case GTLE:\ | |
901 | + if (size > 1) {\ | |
902 | + return (v0 < v) && (v <= v1);\ | |
903 | + } else {\ | |
904 | + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " <= X2.\n";\ | |
905 | + return false;\ | |
906 | + }\ | |
907 | + break;\ | |
908 | + case NOOP: case IN: default: break;\ | |
909 | + } \ | |
910 | + } | |
911 | + | |
912 | + switch(type) { | |
913 | + case U1: VALIDATE(unsigned char, int, u1); break; | |
914 | + case S1: VALIDATE(char, int, s1); break; | |
915 | + case U2: VALIDATE(unsigned short, int, u2); break; | |
916 | + case S2: VALIDATE(short, int, s2); break; | |
917 | + case U4: VALIDATE(unsigned int, unsigned int, u4); break; | |
918 | + case S4: VALIDATE(int, int, s4); break; | |
919 | + case U8: VALIDATE(unsigned long long, unsigned long long, u8); break; | |
920 | + case S8: VALIDATE(long long, long long, s8); break; | |
921 | + case F: VALIDATE(float, float, f); break; | |
922 | + case D: VALIDATE(double, double, d); break; | |
923 | + default: break; | |
924 | + } | |
925 | + | |
926 | + return true; | |
927 | +}; | |
928 | +/* ################################################################### */ | |
929 | +class OptionGroup { | |
930 | +public: | |
931 | + OptionGroup() : delim(0), expectArgs(0), isSet(false), isRequired(false) { } | |
932 | + | |
933 | + ~OptionGroup() { | |
934 | + int i, j; | |
935 | + for(i=0; i < flags.size(); ++i) | |
936 | + delete flags[i]; | |
937 | + | |
938 | + flags.clear(); | |
939 | + parseIndex.clear(); | |
940 | + clearArgs(); | |
941 | + }; | |
942 | + | |
943 | + inline void clearArgs(); | |
944 | + inline void getInt(int&); | |
945 | + inline void getLong(long&); | |
946 | + inline void getLongLong(long long&); | |
947 | + inline void getULong(unsigned long&); | |
948 | + inline void getULongLong(unsigned long long&); | |
949 | + inline void getFloat(float&); | |
950 | + inline void getDouble(double&); | |
951 | + inline void getString(std::string&); | |
952 | + inline void getInts(std::vector<int>&); | |
953 | + inline void getLongs(std::vector<long>&); | |
954 | + inline void getULongs(std::vector<unsigned long>&); | |
955 | + inline void getFloats(std::vector<float>&); | |
956 | + inline void getDoubles(std::vector<double>&); | |
957 | + inline void getStrings(std::vector<std::string>&); | |
958 | + inline void getMultiInts(std::vector< std::vector<int> >&); | |
959 | + inline void getMultiLongs(std::vector< std::vector<long> >&); | |
960 | + inline void getMultiULongs(std::vector< std::vector<unsigned long> >&); | |
961 | + inline void getMultiFloats(std::vector< std::vector<float> >&); | |
962 | + inline void getMultiDoubles(std::vector< std::vector<double> >&); | |
963 | + inline void getMultiStrings(std::vector< std::vector<std::string> >&); | |
964 | + | |
965 | + // defaults value regardless of being set by user. | |
966 | + std::string defaults; | |
967 | + // If expects arguments, this will delimit arg list. | |
968 | + char delim; | |
969 | + // If not 0, then number of delimited args. -1 for arbitrary number. | |
970 | + int expectArgs; | |
971 | + // Descriptive help message shown in usage instructions for option. | |
972 | + std::string help; | |
973 | + // 0 or 1. | |
974 | + bool isRequired; | |
975 | + // A list of flags that denote this option, i.e. -d, --dimension. | |
976 | + std::vector< std::string* > flags; | |
977 | + // If was set (or found). | |
978 | + bool isSet; | |
979 | + // Lists of arguments, per flag instance, after splitting by delimiter. | |
980 | + std::vector< std::vector< std::string* > * > args; | |
981 | + // Index where each group was parsed from input stream to track order. | |
982 | + std::vector<int> parseIndex; | |
983 | +}; | |
984 | +/* ################################################################### */ | |
985 | +void OptionGroup::clearArgs() { | |
986 | + int i,j; | |
987 | + for(i=0; i < args.size(); ++i) { | |
988 | + for(j=0; j < args[i]->size(); ++j) | |
989 | + delete args[i]->at(j); | |
990 | + | |
991 | + delete args[i]; | |
992 | + } | |
993 | + | |
994 | + args.clear(); | |
995 | + isSet = false; | |
996 | +}; | |
997 | +/* ################################################################### */ | |
998 | +void OptionGroup::getInt(int & out) { | |
999 | + if (!isSet) { | |
1000 | + if (defaults.empty()) | |
1001 | + out = 0; | |
1002 | + else | |
1003 | + out = atoi(defaults.c_str()); | |
1004 | + } else { | |
1005 | + if (args.empty() || args[0]->empty()) | |
1006 | + out = 0; | |
1007 | + else { | |
1008 | + out = atoi(args[0]->at(0)->c_str()); | |
1009 | + } | |
1010 | + } | |
1011 | +}; | |
1012 | +/* ################################################################### */ | |
1013 | +void OptionGroup::getLong(long & out) { | |
1014 | + if (!isSet) { | |
1015 | + if (defaults.empty()) | |
1016 | + out = 0; | |
1017 | + else | |
1018 | + out = atoi(defaults.c_str()); | |
1019 | + } else { | |
1020 | + if (args.empty() || args[0]->empty()) | |
1021 | + out = 0; | |
1022 | + else { | |
1023 | + out = atol(args[0]->at(0)->c_str()); | |
1024 | + } | |
1025 | + } | |
1026 | +}; | |
1027 | +/* ################################################################### */ | |
1028 | +void OptionGroup::getLongLong(long long & out) { | |
1029 | + if (!isSet) { | |
1030 | + if (defaults.empty()) | |
1031 | + out = 0; | |
1032 | + else { | |
1033 | + std::stringstream ss(defaults.c_str()); | |
1034 | + ss >> out; | |
1035 | + } | |
1036 | + } else { | |
1037 | + if (args.empty() || args[0]->empty()) | |
1038 | + out = 0; | |
1039 | + else { | |
1040 | + std::stringstream ss(args[0]->at(0)->c_str()); | |
1041 | + ss >> out; | |
1042 | + } | |
1043 | + } | |
1044 | +}; | |
1045 | +/* ################################################################### */ | |
1046 | +void OptionGroup::getULong(unsigned long & out) { | |
1047 | + if (!isSet) { | |
1048 | + if (defaults.empty()) | |
1049 | + out = 0; | |
1050 | + else | |
1051 | + out = atoi(defaults.c_str()); | |
1052 | + } else { | |
1053 | + if (args.empty() || args[0]->empty()) | |
1054 | + out = 0; | |
1055 | + else { | |
1056 | + out = strtoul(args[0]->at(0)->c_str(),0,0); | |
1057 | + } | |
1058 | + } | |
1059 | +}; | |
1060 | +/* ################################################################### */ | |
1061 | +void OptionGroup::getULongLong(unsigned long long & out) { | |
1062 | + if (!isSet) { | |
1063 | + if (defaults.empty()) | |
1064 | + out = 0; | |
1065 | + else { | |
1066 | + std::stringstream ss(defaults.c_str()); | |
1067 | + ss >> out; | |
1068 | + } | |
1069 | + } else { | |
1070 | + if (args.empty() || args[0]->empty()) | |
1071 | + out = 0; | |
1072 | + else { | |
1073 | + std::stringstream ss(args[0]->at(0)->c_str()); | |
1074 | + ss >> out; | |
1075 | + } | |
1076 | + } | |
1077 | +}; | |
1078 | +/* ################################################################### */ | |
1079 | +void OptionGroup::getFloat(float & out) { | |
1080 | + if (!isSet) { | |
1081 | + if (defaults.empty()) | |
1082 | + out = 0.0; | |
1083 | + else | |
1084 | + out = (float)atof(defaults.c_str()); | |
1085 | + } else { | |
1086 | + if (args.empty() || args[0]->empty()) | |
1087 | + out = 0.0; | |
1088 | + else { | |
1089 | + out = (float)atof(args[0]->at(0)->c_str()); | |
1090 | + } | |
1091 | + } | |
1092 | +}; | |
1093 | +/* ################################################################### */ | |
1094 | +void OptionGroup::getDouble(double & out) { | |
1095 | + if (!isSet) { | |
1096 | + if (defaults.empty()) | |
1097 | + out = 0.0; | |
1098 | + else | |
1099 | + out = atof(defaults.c_str()); | |
1100 | + } else { | |
1101 | + if (args.empty() || args[0]->empty()) | |
1102 | + out = 0.0; | |
1103 | + else { | |
1104 | + out = atof(args[0]->at(0)->c_str()); | |
1105 | + } | |
1106 | + } | |
1107 | +}; | |
1108 | +/* ################################################################### */ | |
1109 | +void OptionGroup::getString(std::string & out) { | |
1110 | + if (!isSet) { | |
1111 | + out = defaults; | |
1112 | + } else { | |
1113 | + if (args.empty() || args[0]->empty()) | |
1114 | + out = ""; | |
1115 | + else { | |
1116 | + out = *args[0]->at(0); | |
1117 | + } | |
1118 | + } | |
1119 | +}; | |
1120 | +/* ################################################################### */ | |
1121 | +void OptionGroup::getInts(std::vector<int> & out) { | |
1122 | + if (!isSet) { | |
1123 | + if (!defaults.empty()) { | |
1124 | + std::vector< std::string > strings; | |
1125 | + SplitDelim(defaults, delim, strings); | |
1126 | + StringsToInts(strings, out); | |
1127 | + } | |
1128 | + } else { | |
1129 | + if (!(args.empty() || args[0]->empty())) | |
1130 | + StringsToInts(args[0], &out); | |
1131 | + } | |
1132 | +}; | |
1133 | +/* ################################################################### */ | |
1134 | +void OptionGroup::getLongs(std::vector<long> & out) { | |
1135 | + if (!isSet) { | |
1136 | + if (!defaults.empty()) { | |
1137 | + std::vector< std::string > strings; | |
1138 | + SplitDelim(defaults, delim, strings); | |
1139 | + StringsToLongs(strings, out); | |
1140 | + } | |
1141 | + } else { | |
1142 | + if (!(args.empty() || args[0]->empty())) | |
1143 | + StringsToLongs(args[0], &out); | |
1144 | + } | |
1145 | +}; | |
1146 | +/* ################################################################### */ | |
1147 | +void OptionGroup::getULongs(std::vector<unsigned long> & out) { | |
1148 | + if (!isSet) { | |
1149 | + if (!defaults.empty()) { | |
1150 | + std::vector< std::string > strings; | |
1151 | + SplitDelim(defaults, delim, strings); | |
1152 | + StringsToULongs(strings, out); | |
1153 | + } | |
1154 | + } else { | |
1155 | + if (!(args.empty() || args[0]->empty())) | |
1156 | + StringsToULongs(args[0], &out); | |
1157 | + } | |
1158 | +}; | |
1159 | +/* ################################################################### */ | |
1160 | +void OptionGroup::getFloats(std::vector<float> & out) { | |
1161 | + if (!isSet) { | |
1162 | + if (!defaults.empty()) { | |
1163 | + std::vector< std::string > strings; | |
1164 | + SplitDelim(defaults, delim, strings); | |
1165 | + StringsToFloats(strings, out); | |
1166 | + } | |
1167 | + } else { | |
1168 | + if (!(args.empty() || args[0]->empty())) | |
1169 | + StringsToFloats(args[0], &out); | |
1170 | + } | |
1171 | +}; | |
1172 | +/* ################################################################### */ | |
1173 | +void OptionGroup::getDoubles(std::vector<double> & out) { | |
1174 | + if (!isSet) { | |
1175 | + if (!defaults.empty()) { | |
1176 | + std::vector< std::string > strings; | |
1177 | + SplitDelim(defaults, delim, strings); | |
1178 | + StringsToDoubles(strings, out); | |
1179 | + } | |
1180 | + } else { | |
1181 | + if (!(args.empty() || args[0]->empty())) | |
1182 | + StringsToDoubles(args[0], &out); | |
1183 | + } | |
1184 | +}; | |
1185 | +/* ################################################################### */ | |
1186 | +void OptionGroup::getStrings(std::vector<std::string>& out) { | |
1187 | + if (!isSet) { | |
1188 | + if (!defaults.empty()) { | |
1189 | + SplitDelim(defaults, delim, out); | |
1190 | + } | |
1191 | + } else { | |
1192 | + if (!(args.empty() || args[0]->empty())) | |
1193 | + StringsToStrings(args[0], &out); | |
1194 | + } | |
1195 | +}; | |
1196 | +/* ################################################################### */ | |
1197 | +void OptionGroup::getMultiInts(std::vector< std::vector<int> >& out) { | |
1198 | + if (!isSet) { | |
1199 | + if (!defaults.empty()) { | |
1200 | + std::vector< std::string > strings; | |
1201 | + SplitDelim(defaults, delim, strings); | |
1202 | + if (out.size() < 1) out.resize(1); | |
1203 | + StringsToInts(strings, out[0]); | |
1204 | + } | |
1205 | + } else { | |
1206 | + if (!args.empty()) { | |
1207 | + int n = args.size(); | |
1208 | + if (out.size() < n) out.resize(n); | |
1209 | + for(int i=0; i < n; ++i) { | |
1210 | + StringsToInts(args[i], &out[i]); | |
1211 | + } | |
1212 | + } | |
1213 | + } | |
1214 | +}; | |
1215 | +/* ################################################################### */ | |
1216 | +void OptionGroup::getMultiLongs(std::vector< std::vector<long> >& out) { | |
1217 | + if (!isSet) { | |
1218 | + if (!defaults.empty()) { | |
1219 | + std::vector< std::string > strings; | |
1220 | + SplitDelim(defaults, delim, strings); | |
1221 | + if (out.size() < 1) out.resize(1); | |
1222 | + StringsToLongs(strings, out[0]); | |
1223 | + } | |
1224 | + } else { | |
1225 | + if (!args.empty()) { | |
1226 | + int n = args.size(); | |
1227 | + if (out.size() < n) out.resize(n); | |
1228 | + for(int i=0; i < n; ++i) { | |
1229 | + StringsToLongs(args[i], &out[i]); | |
1230 | + } | |
1231 | + } | |
1232 | + } | |
1233 | +}; | |
1234 | +/* ################################################################### */ | |
1235 | +void OptionGroup::getMultiULongs(std::vector< std::vector<unsigned long> >& out) { | |
1236 | + if (!isSet) { | |
1237 | + if (!defaults.empty()) { | |
1238 | + std::vector< std::string > strings; | |
1239 | + SplitDelim(defaults, delim, strings); | |
1240 | + if (out.size() < 1) out.resize(1); | |
1241 | + StringsToULongs(strings, out[0]); | |
1242 | + } | |
1243 | + } else { | |
1244 | + if (!args.empty()) { | |
1245 | + int n = args.size(); | |
1246 | + if (out.size() < n) out.resize(n); | |
1247 | + for(int i=0; i < n; ++i) { | |
1248 | + StringsToULongs(args[i], &out[i]); | |
1249 | + } | |
1250 | + } | |
1251 | + } | |
1252 | +}; | |
1253 | +/* ################################################################### */ | |
1254 | +void OptionGroup::getMultiFloats(std::vector< std::vector<float> >& out) { | |
1255 | + if (!isSet) { | |
1256 | + if (!defaults.empty()) { | |
1257 | + std::vector< std::string > strings; | |
1258 | + SplitDelim(defaults, delim, strings); | |
1259 | + if (out.size() < 1) out.resize(1); | |
1260 | + StringsToFloats(strings, out[0]); | |
1261 | + } | |
1262 | + } else { | |
1263 | + if (!args.empty()) { | |
1264 | + int n = args.size(); | |
1265 | + if (out.size() < n) out.resize(n); | |
1266 | + for(int i=0; i < n; ++i) { | |
1267 | + StringsToFloats(args[i], &out[i]); | |
1268 | + } | |
1269 | + } | |
1270 | + } | |
1271 | +}; | |
1272 | +/* ################################################################### */ | |
1273 | +void OptionGroup::getMultiDoubles(std::vector< std::vector<double> >& out) { | |
1274 | + if (!isSet) { | |
1275 | + if (!defaults.empty()) { | |
1276 | + std::vector< std::string > strings; | |
1277 | + SplitDelim(defaults, delim, strings); | |
1278 | + if (out.size() < 1) out.resize(1); | |
1279 | + StringsToDoubles(strings, out[0]); | |
1280 | + } | |
1281 | + } else { | |
1282 | + if (!args.empty()) { | |
1283 | + int n = args.size(); | |
1284 | + if (out.size() < n) out.resize(n); | |
1285 | + for(int i=0; i < n; ++i) { | |
1286 | + StringsToDoubles(args[i], &out[i]); | |
1287 | + } | |
1288 | + } | |
1289 | + } | |
1290 | +}; | |
1291 | +/* ################################################################### */ | |
1292 | +void OptionGroup::getMultiStrings(std::vector< std::vector<std::string> >& out) { | |
1293 | + if (!isSet) { | |
1294 | + if (!defaults.empty()) { | |
1295 | + std::vector< std::string > strings; | |
1296 | + SplitDelim(defaults, delim, strings); | |
1297 | + if (out.size() < 1) out.resize(1); | |
1298 | + out[0] = strings; | |
1299 | + } | |
1300 | + } else { | |
1301 | + if (!args.empty()) { | |
1302 | + int n = args.size(); | |
1303 | + if (out.size() < n) out.resize(n); | |
1304 | + | |
1305 | + for(int i=0; i < n; ++i) { | |
1306 | + for(int j=0; j < args[i]->size(); ++j) | |
1307 | + out[i].push_back( *args[i]->at(j) ); | |
1308 | + } | |
1309 | + } | |
1310 | + } | |
1311 | +}; | |
1312 | +/* ################################################################### */ | |
1313 | +typedef std::map< int, ezOptionValidator* > ValidatorMap; | |
1314 | + | |
1315 | +class ezOptionParser { | |
1316 | +public: | |
1317 | + // How to layout usage descriptions with the option flags. | |
1318 | + enum Layout { ALIGN, INTERLEAVE, STAGGER }; | |
1319 | + | |
1320 | + inline ~ezOptionParser(); | |
1321 | + | |
1322 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator=0); | |
1323 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator=0); | |
1324 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator=0); | |
1325 | + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator=0); | |
1326 | + inline bool exportFile(const char * filename, bool all=false); | |
1327 | + inline OptionGroup * get(const char * name); | |
1328 | + inline void getUsage(std::string & usage, int width=80, Layout layout=ALIGN); | |
1329 | + inline void getUsageDescriptions(std::string & usage, int width=80, Layout layout=STAGGER); | |
1330 | + inline bool gotExpected(std::vector<std::string> & badOptions); | |
1331 | + inline bool gotRequired(std::vector<std::string> & badOptions); | |
1332 | + inline bool gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs); | |
1333 | + inline bool importFile(const char * filename, char comment='#'); | |
1334 | + inline int isSet(const char * name); | |
1335 | + inline int isSet(std::string & name); | |
1336 | + inline void parse(int argc, const char * argv[]); | |
1337 | + inline void prettyPrint(std::string & out); | |
1338 | + inline void reset(); | |
1339 | + inline void resetArgs(); | |
1340 | + | |
1341 | + // Insert extra empty line betwee each option's usage description. | |
1342 | + char doublespace; | |
1343 | + // General description in human language on what the user's tool does. | |
1344 | + // It's the first section to get printed in the full usage message. | |
1345 | + std::string overview; | |
1346 | + // A synopsis of command and options usage to show expected order of input arguments. | |
1347 | + // It's the second section to get printed in the full usage message. | |
1348 | + std::string syntax; | |
1349 | + // Example (third) section in usage message. | |
1350 | + std::string example; | |
1351 | + // Final section printed in usage message. For contact, copyrights, version info. | |
1352 | + std::string footer; | |
1353 | + // Map from an option to an Id of its parent group. | |
1354 | + std::map< std::string, int > optionGroupIds; | |
1355 | + // Unordered collection of the option groups. | |
1356 | + std::vector< OptionGroup* > groups; | |
1357 | + // Store unexpected args in input. | |
1358 | + std::vector< std::string* > unknownArgs; | |
1359 | + // List of args that occur left-most before first option flag. | |
1360 | + std::vector< std::string* > firstArgs; | |
1361 | + // List of args that occur after last right-most option flag and its args. | |
1362 | + std::vector< std::string* > lastArgs; | |
1363 | + // List of validators. | |
1364 | + ValidatorMap validators; | |
1365 | + // Maps group id to a validator index into vector of validators. Validator index is -1 if there is no validator for group. | |
1366 | + std::map< int, int > groupValidators; | |
1367 | +}; | |
1368 | +/* ################################################################### */ | |
1369 | +ezOptionParser::~ezOptionParser() { | |
1370 | + reset(); | |
1371 | +} | |
1372 | +/* ################################################################### */ | |
1373 | +void ezOptionParser::reset() { | |
1374 | + this->doublespace = 1; | |
1375 | + | |
1376 | + int i; | |
1377 | + for(i=0; i < groups.size(); ++i) | |
1378 | + delete groups[i]; | |
1379 | + groups.clear(); | |
1380 | + | |
1381 | + for(i=0; i < unknownArgs.size(); ++i) | |
1382 | + delete unknownArgs[i]; | |
1383 | + unknownArgs.clear(); | |
1384 | + | |
1385 | + for(i=0; i < firstArgs.size(); ++i) | |
1386 | + delete firstArgs[i]; | |
1387 | + firstArgs.clear(); | |
1388 | + | |
1389 | + for(i=0; i < lastArgs.size(); ++i) | |
1390 | + delete lastArgs[i]; | |
1391 | + lastArgs.clear(); | |
1392 | + | |
1393 | + ValidatorMap::iterator it; | |
1394 | + for(it = validators.begin(); it != validators.end(); ++it) | |
1395 | + delete it->second; | |
1396 | + | |
1397 | + validators.clear(); | |
1398 | + optionGroupIds.clear(); | |
1399 | + groupValidators.clear(); | |
1400 | +}; | |
1401 | +/* ################################################################### */ | |
1402 | +void ezOptionParser::resetArgs() { | |
1403 | + int i; | |
1404 | + for(i=0; i < groups.size(); ++i) | |
1405 | + groups[i]->clearArgs(); | |
1406 | + | |
1407 | + for(i=0; i < unknownArgs.size(); ++i) | |
1408 | + delete unknownArgs[i]; | |
1409 | + unknownArgs.clear(); | |
1410 | + | |
1411 | + for(i=0; i < firstArgs.size(); ++i) | |
1412 | + delete firstArgs[i]; | |
1413 | + firstArgs.clear(); | |
1414 | + | |
1415 | + for(i=0; i < lastArgs.size(); ++i) | |
1416 | + delete lastArgs[i]; | |
1417 | + lastArgs.clear(); | |
1418 | +}; | |
1419 | +/* ################################################################### */ | |
1420 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator) { | |
1421 | + int id = this->groups.size(); | |
1422 | + OptionGroup * g = new OptionGroup; | |
1423 | + g->defaults = defaults; | |
1424 | + g->isRequired = required; | |
1425 | + g->expectArgs = expectArgs; | |
1426 | + g->delim = delim; | |
1427 | + g->isSet = 0; | |
1428 | + g->help = help; | |
1429 | + std::string *f1 = new std::string(flag1); | |
1430 | + g->flags.push_back( f1 ); | |
1431 | + this->optionGroupIds[flag1] = id; | |
1432 | + this->groups.push_back(g); | |
1433 | + | |
1434 | + if (validator) { | |
1435 | + int vid = validator->id; | |
1436 | + validators[vid] = validator; | |
1437 | + groupValidators[id] = vid; | |
1438 | + } else { | |
1439 | + groupValidators[id] = -1; | |
1440 | + } | |
1441 | +}; | |
1442 | +/* ################################################################### */ | |
1443 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator) { | |
1444 | + int id = this->groups.size(); | |
1445 | + OptionGroup * g = new OptionGroup; | |
1446 | + g->defaults = defaults; | |
1447 | + g->isRequired = required; | |
1448 | + g->expectArgs = expectArgs; | |
1449 | + g->delim = delim; | |
1450 | + g->isSet = 0; | |
1451 | + g->help = help; | |
1452 | + std::string *f1 = new std::string(flag1); | |
1453 | + g->flags.push_back( f1 ); | |
1454 | + std::string *f2 = new std::string(flag2); | |
1455 | + g->flags.push_back( f2 ); | |
1456 | + this->optionGroupIds[flag1] = id; | |
1457 | + this->optionGroupIds[flag2] = id; | |
1458 | + | |
1459 | + this->groups.push_back(g); | |
1460 | + | |
1461 | + if (validator) { | |
1462 | + int vid = validator->id; | |
1463 | + validators[vid] = validator; | |
1464 | + groupValidators[id] = vid; | |
1465 | + } else { | |
1466 | + groupValidators[id] = -1; | |
1467 | + } | |
1468 | +}; | |
1469 | +/* ################################################################### */ | |
1470 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator) { | |
1471 | + int id = this->groups.size(); | |
1472 | + OptionGroup * g = new OptionGroup; | |
1473 | + g->defaults = defaults; | |
1474 | + g->isRequired = required; | |
1475 | + g->expectArgs = expectArgs; | |
1476 | + g->delim = delim; | |
1477 | + g->isSet = 0; | |
1478 | + g->help = help; | |
1479 | + std::string *f1 = new std::string(flag1); | |
1480 | + g->flags.push_back( f1 ); | |
1481 | + std::string *f2 = new std::string(flag2); | |
1482 | + g->flags.push_back( f2 ); | |
1483 | + std::string *f3 = new std::string(flag3); | |
1484 | + g->flags.push_back( f3 ); | |
1485 | + this->optionGroupIds[flag1] = id; | |
1486 | + this->optionGroupIds[flag2] = id; | |
1487 | + this->optionGroupIds[flag3] = id; | |
1488 | + | |
1489 | + this->groups.push_back(g); | |
1490 | + | |
1491 | + if (validator) { | |
1492 | + int vid = validator->id; | |
1493 | + validators[vid] = validator; | |
1494 | + groupValidators[id] = vid; | |
1495 | + } else { | |
1496 | + groupValidators[id] = -1; | |
1497 | + } | |
1498 | +}; | |
1499 | +/* ################################################################### */ | |
1500 | +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator) { | |
1501 | + int id = this->groups.size(); | |
1502 | + OptionGroup * g = new OptionGroup; | |
1503 | + g->defaults = defaults; | |
1504 | + g->isRequired = required; | |
1505 | + g->expectArgs = expectArgs; | |
1506 | + g->delim = delim; | |
1507 | + g->isSet = 0; | |
1508 | + g->help = help; | |
1509 | + std::string *f1 = new std::string(flag1); | |
1510 | + g->flags.push_back( f1 ); | |
1511 | + std::string *f2 = new std::string(flag2); | |
1512 | + g->flags.push_back( f2 ); | |
1513 | + std::string *f3 = new std::string(flag3); | |
1514 | + g->flags.push_back( f3 ); | |
1515 | + std::string *f4 = new std::string(flag4); | |
1516 | + g->flags.push_back( f4 ); | |
1517 | + this->optionGroupIds[flag1] = id; | |
1518 | + this->optionGroupIds[flag2] = id; | |
1519 | + this->optionGroupIds[flag3] = id; | |
1520 | + this->optionGroupIds[flag4] = id; | |
1521 | + | |
1522 | + this->groups.push_back(g); | |
1523 | + | |
1524 | + if (validator) { | |
1525 | + int vid = validator->id; | |
1526 | + validators[vid] = validator; | |
1527 | + groupValidators[id] = vid; | |
1528 | + } else { | |
1529 | + groupValidators[id] = -1; | |
1530 | + } | |
1531 | +}; | |
1532 | +/* ################################################################### */ | |
1533 | +bool ezOptionParser::exportFile(const char * filename, bool all) { | |
1534 | + int i; | |
1535 | + std::string out; | |
1536 | + bool quote; | |
1537 | + | |
1538 | + // Export the first args, except the program name, so start from 1. | |
1539 | + for(i=1; i < firstArgs.size(); ++i) { | |
1540 | + quote = ((firstArgs[i]->find_first_of(" \t") != std::string::npos) && (firstArgs[i]->find_first_of("\'\"") == std::string::npos)); | |
1541 | + | |
1542 | + if (quote) | |
1543 | + out.append("\""); | |
1544 | + | |
1545 | + out.append(*firstArgs[i]); | |
1546 | + if (quote) | |
1547 | + out.append("\""); | |
1548 | + | |
1549 | + out.append(" "); | |
1550 | + } | |
1551 | + | |
1552 | + if (firstArgs.size() > 1) | |
1553 | + out.append("\n"); | |
1554 | + | |
1555 | + std::vector<std::string* > stringPtrs(groups.size()); | |
1556 | + int j,m; | |
1557 | + int n = groups.size(); | |
1558 | + for(i=0; i < n; ++i) { | |
1559 | + stringPtrs[i] = groups[i]->flags[0]; | |
1560 | + } | |
1561 | + | |
1562 | + OptionGroup *g; | |
1563 | + // Sort first flag of each group with other groups. | |
1564 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | |
1565 | + for(i=0; i < n; ++i) { | |
1566 | + g = get(stringPtrs[i]->c_str()); | |
1567 | + if (g->isSet || all) { | |
1568 | + if (!g->isSet || g->args.empty()) { | |
1569 | + if (!g->defaults.empty()) { | |
1570 | + out.append(*stringPtrs[i]); | |
1571 | + out.append(" "); | |
1572 | + quote = ((g->defaults.find_first_of(" \t") != std::string::npos) && (g->defaults.find_first_of("\'\"") == std::string::npos)); | |
1573 | + if (quote) | |
1574 | + out.append("\""); | |
1575 | + | |
1576 | + out.append(g->defaults); | |
1577 | + if (quote) | |
1578 | + out.append("\""); | |
1579 | + | |
1580 | + out.append("\n"); | |
1581 | + } | |
1582 | + } else { | |
1583 | + int n = g->args.size(); | |
1584 | + for(int j=0; j < n; ++j) { | |
1585 | + out.append(*stringPtrs[i]); | |
1586 | + out.append(" "); | |
1587 | + m = g->args[j]->size(); | |
1588 | + | |
1589 | + for(int k=0; k < m; ++k) { | |
1590 | + quote = ( (*g->args[j]->at(k)).find_first_of(" \t") != std::string::npos ); | |
1591 | + if (quote) | |
1592 | + out.append("\""); | |
1593 | + | |
1594 | + out.append(*g->args[j]->at(k)); | |
1595 | + if (quote) | |
1596 | + out.append("\""); | |
1597 | + | |
1598 | + if ((g->delim) && ((k+1) != m)) | |
1599 | + out.append(1,g->delim); | |
1600 | + } | |
1601 | + out.append("\n"); | |
1602 | + } | |
1603 | + } | |
1604 | + } | |
1605 | + } | |
1606 | + | |
1607 | + // Export the last args. | |
1608 | + for(i=0; i < lastArgs.size(); ++i) { | |
1609 | + quote = ( lastArgs[i]->find_first_of(" \t") != std::string::npos ); | |
1610 | + if (quote) | |
1611 | + out.append("\""); | |
1612 | + | |
1613 | + out.append(*lastArgs[i]); | |
1614 | + if (quote) | |
1615 | + out.append("\""); | |
1616 | + | |
1617 | + out.append(" "); | |
1618 | + } | |
1619 | + | |
1620 | + std::ofstream file(filename); | |
1621 | + if (!file.is_open()) | |
1622 | + return false; | |
1623 | + | |
1624 | + file << out; | |
1625 | + file.close(); | |
1626 | + | |
1627 | + return true; | |
1628 | +}; | |
1629 | +/* ################################################################### */ | |
1630 | +// Does not overwrite current options. | |
1631 | +// Returns true if file was read successfully. | |
1632 | +// So if this is used before parsing CLI, then option values will reflect | |
1633 | +// this file, but if used after parsing CLI, then values will contain | |
1634 | +// both CLI values and file's values. | |
1635 | +// | |
1636 | +// Comment lines are allowed if prefixed with #. | |
1637 | +// Strings should be quoted as usual. | |
1638 | +bool ezOptionParser::importFile(const char * filename, char comment) { | |
1639 | + std::ifstream file (filename, std::ios::in | std::ios::ate); | |
1640 | + if (!file.is_open()) | |
1641 | + return false; | |
1642 | + | |
1643 | + // Read entire file contents. | |
1644 | + std::ifstream::pos_type size = file.tellg(); | |
1645 | + char * memblock = new char[(int)size+1]; // Add one for end of string. | |
1646 | + file.seekg (0, std::ios::beg); | |
1647 | + file.read (memblock, size); | |
1648 | + memblock[size] = '\0'; | |
1649 | + file.close(); | |
1650 | + | |
1651 | + // Find comment lines. | |
1652 | + std::list<std::string*> lines; | |
1653 | + std::string memblockstring(memblock); | |
1654 | + delete[] memblock; | |
1655 | + SplitDelim(memblockstring, '\n', lines); | |
1656 | + int i,j,n; | |
1657 | + std::list<std::string*>::iterator iter; | |
1658 | + std::vector<int> sq, dq; // Single and double quote indices. | |
1659 | + std::vector<int>::iterator lo; // For searching quote indices. | |
1660 | + size_t pos; | |
1661 | + const char *str; | |
1662 | + std::string *line; | |
1663 | + // Find all single and double quotes to correctly handle comment tokens. | |
1664 | + for(iter=lines.begin(); iter != lines.end(); ++iter) { | |
1665 | + line = *iter; | |
1666 | + str = line->c_str(); | |
1667 | + n = line->size(); | |
1668 | + sq.clear(); | |
1669 | + dq.clear(); | |
1670 | + if (n) { | |
1671 | + // If first char is comment, then erase line and continue. | |
1672 | + pos = line->find_first_not_of(" \t\r"); | |
1673 | + if ((pos==std::string::npos) || (line->at(pos)==comment)) { | |
1674 | + line->erase(); | |
1675 | + continue; | |
1676 | + } else { | |
1677 | + // Erase whitespace prefix. | |
1678 | + line->erase(0,pos); | |
1679 | + n = line->size(); | |
1680 | + } | |
1681 | + | |
1682 | + if (line->at(0)=='"') | |
1683 | + dq.push_back(0); | |
1684 | + | |
1685 | + if (line->at(0)=='\'') | |
1686 | + sq.push_back(0); | |
1687 | + } else { // Empty line. | |
1688 | + continue; | |
1689 | + } | |
1690 | + | |
1691 | + for(i=1; i < n; ++i) { | |
1692 | + if ( (str[i]=='"') && (str[i-1]!='\\') ) | |
1693 | + dq.push_back(i); | |
1694 | + else if ( (str[i]=='\'') && (str[i-1]!='\\') ) | |
1695 | + sq.push_back(i); | |
1696 | + } | |
1697 | + // Scan for comments, and when found, check bounds of quotes. | |
1698 | + // Start with second char because already checked first char. | |
1699 | + for(i=1; i < n; ++i) { | |
1700 | + if ( (line->at(i)==comment) && (line->at(i-1)!='\\') ) { | |
1701 | + // If within open/close quote pair, then not real comment. | |
1702 | + if (sq.size()) { | |
1703 | + lo = std::lower_bound(sq.begin(), sq.end(), i); | |
1704 | + // All start of strings will be even indices, closing quotes is odd indices. | |
1705 | + j = (int)(lo-sq.begin()); | |
1706 | + if ( (j % 2) == 0) { // Even implies comment char not in quote pair. | |
1707 | + // Erase from comment char to end of line. | |
1708 | + line->erase(i); | |
1709 | + break; | |
1710 | + } | |
1711 | + } else if (dq.size()) { | |
1712 | + // Repeat tests for double quotes. | |
1713 | + lo = std::lower_bound(dq.begin(), dq.end(), i); | |
1714 | + j = (int)(lo-dq.begin()); | |
1715 | + if ( (j % 2) == 0) { | |
1716 | + line->erase(i); | |
1717 | + break; | |
1718 | + } | |
1719 | + } else { | |
1720 | + // Not in quotes. | |
1721 | + line->erase(i); | |
1722 | + break; | |
1723 | + } | |
1724 | + } | |
1725 | + } | |
1726 | + } | |
1727 | + | |
1728 | + std::string cmd; | |
1729 | + // Convert list to string without newlines to simulate commandline. | |
1730 | + for(iter=lines.begin(); iter != lines.end(); ++iter) { | |
1731 | + if (! (*iter)->empty()) { | |
1732 | + cmd.append(**iter); | |
1733 | + cmd.append(" "); | |
1734 | + } | |
1735 | + } | |
1736 | + | |
1737 | + // Now parse as if from command line. | |
1738 | + int argc=0; | |
1739 | + char** argv = CommandLineToArgvA((char*)cmd.c_str(), &argc); | |
1740 | + | |
1741 | + // Parse. | |
1742 | + parse(argc, (const char**)argv); | |
1743 | + if (argv) free(argv); | |
1744 | + for(iter=lines.begin(); iter != lines.end(); ++iter) | |
1745 | + delete *iter; | |
1746 | + | |
1747 | + return true; | |
1748 | +}; | |
1749 | +/* ################################################################### */ | |
1750 | +int ezOptionParser::isSet(const char * name) { | |
1751 | + std::string sname(name); | |
1752 | + | |
1753 | + if (this->optionGroupIds.count(sname)) { | |
1754 | + return this->groups[this->optionGroupIds[sname]]->isSet; | |
1755 | + } | |
1756 | + | |
1757 | + return 0; | |
1758 | +}; | |
1759 | +/* ################################################################### */ | |
1760 | +int ezOptionParser::isSet(std::string & name) { | |
1761 | + if (this->optionGroupIds.count(name)) { | |
1762 | + return this->groups[this->optionGroupIds[name]]->isSet; | |
1763 | + } | |
1764 | + | |
1765 | + return 0; | |
1766 | +}; | |
1767 | +/* ################################################################### */ | |
1768 | +OptionGroup * ezOptionParser::get(const char * name) { | |
1769 | + if (optionGroupIds.count(name)) { | |
1770 | + return groups[optionGroupIds[name]]; | |
1771 | + } | |
1772 | + | |
1773 | + return 0; | |
1774 | +}; | |
1775 | +/* ################################################################### */ | |
1776 | +void ezOptionParser::getUsage(std::string & usage, int width, Layout layout) { | |
1777 | + | |
1778 | + usage.append(overview); | |
1779 | + usage.append("\n\n"); | |
1780 | + usage.append("USAGE: "); | |
1781 | + usage.append(syntax); | |
1782 | + usage.append("\n\nOPTIONS:\n\n"); | |
1783 | + getUsageDescriptions(usage, width, layout); | |
1784 | + | |
1785 | + if (!example.empty()) { | |
1786 | + usage.append("EXAMPLES:\n\n"); | |
1787 | + usage.append(example); | |
1788 | + } | |
1789 | + | |
1790 | + if (!footer.empty()) { | |
1791 | + usage.append(footer); | |
1792 | + } | |
1793 | +}; | |
1794 | +/* ################################################################### */ | |
1795 | +// Creates 2 column formatted help descriptions for each option flag. | |
1796 | +void ezOptionParser::getUsageDescriptions(std::string & usage, int width, Layout layout) { | |
1797 | + // Sort each flag list amongst each group. | |
1798 | + int i; | |
1799 | + // Store index of flag groups before sort for easy lookup later. | |
1800 | + std::map<std::string*, int> stringPtrToIndexMap; | |
1801 | + std::vector<std::string* > stringPtrs(groups.size()); | |
1802 | + | |
1803 | + for(i=0; i < groups.size(); ++i) { | |
1804 | + std::sort(groups[i]->flags.begin(), groups[i]->flags.end(), CmpOptStringPtr); | |
1805 | + stringPtrToIndexMap[groups[i]->flags[0]] = i; | |
1806 | + stringPtrs[i] = groups[i]->flags[0]; | |
1807 | + } | |
1808 | + | |
1809 | + size_t j, k, n; | |
1810 | + std::string opts; | |
1811 | + std::vector<std::string> sortedOpts; | |
1812 | + // Sort first flag of each group with other groups. | |
1813 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | |
1814 | + for(i=0; i < groups.size(); ++i) { | |
1815 | + //printf("DEBUG:%d: %d %d %s\n", __LINE__, i, stringPtrToIndexMap[stringPtrs[i]], stringPtrs[i]->c_str()); | |
1816 | + k = stringPtrToIndexMap[stringPtrs[i]]; | |
1817 | + opts.clear(); | |
1818 | + for(j=0; j < groups[k]->flags.size()-1; ++j) { | |
1819 | + opts.append(*groups[k]->flags[j]); | |
1820 | + opts.append(", "); | |
1821 | + | |
1822 | + if (opts.size() > width) | |
1823 | + opts.append("\n"); | |
1824 | + } | |
1825 | + // The last flag. No need to append comma anymore. | |
1826 | + opts.append( *groups[k]->flags[j] ); | |
1827 | + | |
1828 | + if (groups[k]->expectArgs) { | |
1829 | + opts.append(" ARG"); | |
1830 | + | |
1831 | + if (groups[k]->delim) { | |
1832 | + opts.append("1["); | |
1833 | + opts.append(1, groups[k]->delim); | |
1834 | + opts.append("ARGn]"); | |
1835 | + } | |
1836 | + } | |
1837 | + | |
1838 | + sortedOpts.push_back(opts); | |
1839 | + } | |
1840 | + | |
1841 | + // Each option group will use this to build multiline help description. | |
1842 | + std::list<std::string*> desc; | |
1843 | + // Number of whitespaces from start of line to description (interleave layout) or | |
1844 | + // gap between flag names and description (align, stagger layouts). | |
1845 | + int gutter = 3; | |
1846 | + | |
1847 | + // Find longest opt flag string to set column start for help usage descriptions. | |
1848 | + int maxlen=0; | |
1849 | + if (layout == ALIGN) { | |
1850 | + for(i=0; i < groups.size(); ++i) { | |
1851 | + if (maxlen < sortedOpts[i].size()) | |
1852 | + maxlen = sortedOpts[i].size(); | |
1853 | + } | |
1854 | + } | |
1855 | + | |
1856 | + // The amount of space remaining on a line for help text after flags. | |
1857 | + int helpwidth; | |
1858 | + std::list<std::string*>::iterator cIter, insertionIter; | |
1859 | + size_t pos; | |
1860 | + for(i=0; i < groups.size(); ++i) { | |
1861 | + k = stringPtrToIndexMap[stringPtrs[i]]; | |
1862 | + | |
1863 | + if (layout == STAGGER) | |
1864 | + maxlen = sortedOpts[i].size(); | |
1865 | + | |
1866 | + int pad = gutter + maxlen; | |
1867 | + helpwidth = width - pad; | |
1868 | + | |
1869 | + // All the following split-fu could be optimized by just using substring (offset, length) tuples, but just to get it done, we'll do some not-too expensive string copying. | |
1870 | + SplitDelim(groups[k]->help, '\n', desc); | |
1871 | + // Split lines longer than allowable help width. | |
1872 | + for(insertionIter=desc.begin(), cIter=insertionIter++; | |
1873 | + cIter != desc.end(); | |
1874 | + cIter=insertionIter++) { | |
1875 | + if ((*cIter)->size() > helpwidth) { | |
1876 | + // Get pointer to next string to insert new strings before it. | |
1877 | + std::string *rem = *cIter; | |
1878 | + // Remove this line and add back in pieces. | |
1879 | + desc.erase(cIter); | |
1880 | + // Loop until remaining string is short enough. | |
1881 | + while (rem->size() > helpwidth) { | |
1882 | + // Find whitespace to split before helpwidth. | |
1883 | + if (rem->at(helpwidth) == ' ') { | |
1884 | + // If word ends exactly at helpwidth, then split after it. | |
1885 | + pos = helpwidth; | |
1886 | + } else { | |
1887 | + // Otherwise, split occurs midword, so find whitespace before this word. | |
1888 | + pos = rem->rfind(" ", helpwidth); | |
1889 | + } | |
1890 | + // Insert split string. | |
1891 | + desc.insert(insertionIter, new std::string(*rem, 0, pos)); | |
1892 | + // Now skip any whitespace to start new line. | |
1893 | + pos = rem->find_first_not_of(' ', pos); | |
1894 | + rem->erase(0, pos); | |
1895 | + } | |
1896 | + | |
1897 | + if (rem->size()) | |
1898 | + desc.insert(insertionIter, rem); | |
1899 | + else | |
1900 | + delete rem; | |
1901 | + } | |
1902 | + } | |
1903 | + | |
1904 | + usage.append(sortedOpts[i]); | |
1905 | + if (layout != INTERLEAVE) | |
1906 | + // Add whitespace between option names and description. | |
1907 | + usage.append(pad - sortedOpts[i].size(), ' '); | |
1908 | + else { | |
1909 | + usage.append("\n"); | |
1910 | + usage.append(gutter, ' '); | |
1911 | + } | |
1912 | + | |
1913 | + // First line already padded above (before calling SplitDelim) after option flag names. | |
1914 | + cIter = desc.begin(); | |
1915 | + usage.append(**cIter); | |
1916 | + usage.append("\n"); | |
1917 | + // Now inject the pad for each line. | |
1918 | + for(++cIter; cIter != desc.end(); ++cIter) { | |
1919 | + usage.append(pad, ' '); | |
1920 | + usage.append(**cIter); | |
1921 | + usage.append("\n"); | |
1922 | + } | |
1923 | + | |
1924 | + if (this->doublespace) usage.append("\n"); | |
1925 | + | |
1926 | + if (desc.size()) { | |
1927 | + for(cIter=desc.begin(); cIter != desc.end(); ++cIter) | |
1928 | + delete *cIter; | |
1929 | + | |
1930 | + desc.clear(); | |
1931 | + } | |
1932 | + } | |
1933 | +}; | |
1934 | +/* ################################################################### */ | |
1935 | +bool ezOptionParser::gotExpected(std::vector<std::string> & badOptions) { | |
1936 | + int i,j; | |
1937 | + | |
1938 | + for(i=0; i < groups.size(); ++i) { | |
1939 | + OptionGroup *g = groups[i]; | |
1940 | + // If was set, ensure number of args is correct. | |
1941 | + if (g->isSet) { | |
1942 | + if ((g->expectArgs != 0) && g->args.empty()) { | |
1943 | + badOptions.push_back(*g->flags[0]); | |
1944 | + continue; | |
1945 | + } | |
1946 | + | |
1947 | + for(j=0; j < g->args.size(); ++j) { | |
1948 | + if ((g->expectArgs != -1) && (g->expectArgs != g->args[j]->size())) | |
1949 | + badOptions.push_back(*g->flags[0]); | |
1950 | + } | |
1951 | + } | |
1952 | + } | |
1953 | + | |
1954 | + return badOptions.empty(); | |
1955 | +}; | |
1956 | +/* ################################################################### */ | |
1957 | +bool ezOptionParser::gotRequired(std::vector<std::string> & badOptions) { | |
1958 | + int i; | |
1959 | + | |
1960 | + for(i=0; i < groups.size(); ++i) { | |
1961 | + OptionGroup *g = groups[i]; | |
1962 | + // Simple case when required but user never set it. | |
1963 | + if (g->isRequired && (!g->isSet)) { | |
1964 | + badOptions.push_back(*g->flags[0]); | |
1965 | + continue; | |
1966 | + } | |
1967 | + } | |
1968 | + | |
1969 | + return badOptions.empty(); | |
1970 | +}; | |
1971 | +/* ################################################################### */ | |
1972 | +bool ezOptionParser::gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs) { | |
1973 | + int groupid, validatorid; | |
1974 | + std::map< int, int >::iterator it; | |
1975 | + | |
1976 | + for(it = groupValidators.begin(); it != groupValidators.end(); ++it) { | |
1977 | + groupid = it->first; | |
1978 | + validatorid = it->second; | |
1979 | + if (validatorid < 0) continue; | |
1980 | + | |
1981 | + OptionGroup *g = groups[groupid]; | |
1982 | + ezOptionValidator *v = validators[validatorid]; | |
1983 | + bool nextgroup = false; | |
1984 | + | |
1985 | + for (int i = 0; i < g->args.size(); ++i) { | |
1986 | + if (nextgroup) break; | |
1987 | + std::vector< std::string* > * args = g->args[i]; | |
1988 | + for (int j = 0; j < args->size(); ++j) { | |
1989 | + if (!v->isValid(args->at(j))) { | |
1990 | + badOptions.push_back(*g->flags[0]); | |
1991 | + badArgs.push_back(*args->at(j)); | |
1992 | + nextgroup = true; | |
1993 | + break; | |
1994 | + } | |
1995 | + } | |
1996 | + } | |
1997 | + } | |
1998 | + | |
1999 | + return badOptions.empty(); | |
2000 | +}; | |
2001 | +/* ################################################################### */ | |
2002 | +void ezOptionParser::parse(int argc, const char * argv[]) { | |
2003 | + if (argc < 1) return; | |
2004 | + | |
2005 | + /* | |
2006 | + std::map<std::string,int>::iterator it; | |
2007 | + for ( it=optionGroupIds.begin() ; it != optionGroupIds.end(); it++ ) | |
2008 | + std::cout << (*it).first << " => " << (*it).second << std::endl; | |
2009 | + */ | |
2010 | + | |
2011 | + int found=0, i, k, firstOptIndex=0, lastOptIndex=0; | |
2012 | + std::string s; | |
2013 | + OptionGroup *g; | |
2014 | + | |
2015 | + for(i=0; i < argc; ++i) { | |
2016 | + s = argv[i]; | |
2017 | + | |
2018 | + if (optionGroupIds.count(s)) | |
2019 | + break; | |
2020 | + } | |
2021 | + | |
2022 | + firstOptIndex = i; | |
2023 | + | |
2024 | + if (firstOptIndex == argc) { | |
2025 | + // No flags encountered, so set last args. | |
2026 | + this->firstArgs.push_back(new std::string(argv[0])); | |
2027 | + | |
2028 | + for(k=1; k < argc; ++k) | |
2029 | + this->lastArgs.push_back(new std::string(argv[k])); | |
2030 | + | |
2031 | + return; | |
2032 | + } | |
2033 | + | |
2034 | + // Store initial args before opts appear. | |
2035 | + for(k=0; k < i; ++k) { | |
2036 | + this->firstArgs.push_back(new std::string(argv[k])); | |
2037 | + } | |
2038 | + | |
2039 | + for(; i < argc; ++i) { | |
2040 | + s = argv[i]; | |
2041 | + | |
2042 | + if (optionGroupIds.count(s)) { | |
2043 | + k = optionGroupIds[s]; | |
2044 | + g = groups[k]; | |
2045 | + g->isSet = 1; | |
2046 | + g->parseIndex.push_back(i); | |
2047 | + | |
2048 | + if (g->expectArgs) { | |
2049 | + // Read ahead to get args. | |
2050 | + ++i; | |
2051 | + if (i >= argc) return; | |
2052 | + g->args.push_back(new std::vector<std::string*>); | |
2053 | + SplitDelim(argv[i], g->delim, g->args.back()); | |
2054 | + } | |
2055 | + lastOptIndex = i; | |
2056 | + } | |
2057 | + } | |
2058 | + | |
2059 | + // Scan for unknown opts/arguments. | |
2060 | + for(i=firstOptIndex; i <= lastOptIndex; ++i) { | |
2061 | + s = argv[i]; | |
2062 | + | |
2063 | + if (optionGroupIds.count(s)) { | |
2064 | + k = optionGroupIds[s]; | |
2065 | + g = groups[k]; | |
2066 | + if (g->expectArgs) { | |
2067 | + // Read ahead for args and skip them. | |
2068 | + ++i; | |
2069 | + } | |
2070 | + } else { | |
2071 | + unknownArgs.push_back(new std::string(argv[i])); | |
2072 | + } | |
2073 | + } | |
2074 | + | |
2075 | + if ( lastOptIndex >= (argc-1) ) return; | |
2076 | + | |
2077 | + // Store final args without flags. | |
2078 | + for(k=lastOptIndex + 1; k < argc; ++k) { | |
2079 | + this->lastArgs.push_back(new std::string(argv[k])); | |
2080 | + } | |
2081 | +}; | |
2082 | +/* ################################################################### */ | |
2083 | +void ezOptionParser::prettyPrint(std::string & out) { | |
2084 | + char tmp[256]; | |
2085 | + int i,j,k; | |
2086 | + | |
2087 | + out += "First Args:\n"; | |
2088 | + for(i=0; i < firstArgs.size(); ++i) { | |
2089 | + sprintf(tmp, "%d: %s\n", i+1, firstArgs[i]->c_str()); | |
2090 | + out += tmp; | |
2091 | + } | |
2092 | + | |
2093 | + // Sort the option flag names. | |
2094 | + int n = groups.size(); | |
2095 | + std::vector<std::string* > stringPtrs(n); | |
2096 | + for(i=0; i < n; ++i) { | |
2097 | + stringPtrs[i] = groups[i]->flags[0]; | |
2098 | + } | |
2099 | + | |
2100 | + // Sort first flag of each group with other groups. | |
2101 | + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr); | |
2102 | + | |
2103 | + out += "\nOptions:\n"; | |
2104 | + OptionGroup *g; | |
2105 | + for(i=0; i < n; ++i) { | |
2106 | + g = get(stringPtrs[i]->c_str()); | |
2107 | + out += "\n"; | |
2108 | + // The flag names: | |
2109 | + for(j=0; j < g->flags.size()-1; ++j) { | |
2110 | + sprintf(tmp, "%s, ", g->flags[j]->c_str()); | |
2111 | + out += tmp; | |
2112 | + } | |
2113 | + sprintf(tmp, "%s:\n", g->flags.back()->c_str()); | |
2114 | + out += tmp; | |
2115 | + | |
2116 | + if (g->isSet) { | |
2117 | + if (g->expectArgs) { | |
2118 | + if (g->args.empty()) { | |
2119 | + sprintf(tmp, "%s (default)\n", g->defaults.c_str()); | |
2120 | + out += tmp; | |
2121 | + } else { | |
2122 | + for(k=0; k < g->args.size(); ++k) { | |
2123 | + for(j=0; j < g->args[k]->size()-1; ++j) { | |
2124 | + sprintf(tmp, "%s%c", g->args[k]->at(j)->c_str(), g->delim); | |
2125 | + out += tmp; | |
2126 | + } | |
2127 | + sprintf(tmp, "%s\n", g->args[k]->back()->c_str(), g->delim); | |
2128 | + out += tmp; | |
2129 | + } | |
2130 | + } | |
2131 | + } else { // Set but no args expected. | |
2132 | + sprintf(tmp, "Set\n"); | |
2133 | + out += tmp; | |
2134 | + } | |
2135 | + } else { | |
2136 | + sprintf(tmp, "Not set\n"); | |
2137 | + out += tmp; | |
2138 | + } | |
2139 | + } | |
2140 | + | |
2141 | + out += "\nLast Args:\n"; | |
2142 | + for(i=0; i < lastArgs.size(); ++i) { | |
2143 | + sprintf(tmp, "%d: %s\n", i+1, lastArgs[i]->c_str()); | |
2144 | + out += tmp; | |
2145 | + } | |
2146 | + | |
2147 | + out += "\nUnknown Args:\n"; | |
2148 | + for(i=0; i < unknownArgs.size(); ++i) { | |
2149 | + sprintf(tmp, "%d: %s\n", i+1, unknownArgs[i]->c_str()); | |
2150 | + out += tmp; | |
2151 | + } | |
2152 | +}; | |
2153 | +} | |
2154 | +/* ################################################################### */ | |
2155 | +#endif /* EZ_OPTION_PARSER_H */ | |
... | ... |
morfeusz/java/pl/waw/ipipan/morfeusz/MorfeuszProcessorType.java
0 → 100644
1 | +/* ---------------------------------------------------------------------------- | |
2 | + * This file was automatically generated by SWIG (http://www.swig.org). | |
3 | + * Version 2.0.10 | |
4 | + * | |
5 | + * Do not make changes to this file unless you know what you are doing--modify | |
6 | + * the SWIG interface file instead. | |
7 | + * ----------------------------------------------------------------------------- */ | |
8 | + | |
9 | +package pl.waw.ipipan.morfeusz; | |
10 | + | |
11 | +public enum MorfeuszProcessorType { | |
12 | + GENERATOR, | |
13 | + ANALYZER; | |
14 | + | |
15 | + public final int swigValue() { | |
16 | + return swigValue; | |
17 | + } | |
18 | + | |
19 | + public static MorfeuszProcessorType swigToEnum(int swigValue) { | |
20 | + MorfeuszProcessorType[] swigValues = MorfeuszProcessorType.class.getEnumConstants(); | |
21 | + if (swigValue < swigValues.length && swigValue >= 0 && swigValues[swigValue].swigValue == swigValue) | |
22 | + return swigValues[swigValue]; | |
23 | + for (MorfeuszProcessorType swigEnum : swigValues) | |
24 | + if (swigEnum.swigValue == swigValue) | |
25 | + return swigEnum; | |
26 | + throw new IllegalArgumentException("No enum " + MorfeuszProcessorType.class + " with value " + swigValue); | |
27 | + } | |
28 | + | |
29 | + @SuppressWarnings("unused") | |
30 | + private MorfeuszProcessorType() { | |
31 | + this.swigValue = SwigNext.next++; | |
32 | + } | |
33 | + | |
34 | + @SuppressWarnings("unused") | |
35 | + private MorfeuszProcessorType(int swigValue) { | |
36 | + this.swigValue = swigValue; | |
37 | + SwigNext.next = swigValue+1; | |
38 | + } | |
39 | + | |
40 | + @SuppressWarnings("unused") | |
41 | + private MorfeuszProcessorType(MorfeuszProcessorType swigEnum) { | |
42 | + this.swigValue = swigEnum.swigValue; | |
43 | + SwigNext.next = this.swigValue+1; | |
44 | + } | |
45 | + | |
46 | + private final int swigValue; | |
47 | + | |
48 | + private static class SwigNext { | |
49 | + private static int next = 0; | |
50 | + } | |
51 | +} | |
52 | + | |
... | ... |
morfeusz/morfeusz_analyzer.cpp
... | ... | @@ -8,18 +8,96 @@ |
8 | 8 | #include <cstdlib> |
9 | 9 | #include <iostream> |
10 | 10 | #include <vector> |
11 | +#include <map> | |
11 | 12 | #include "fsa/fsa.hpp" |
12 | 13 | #include "Tagset.hpp" |
13 | 14 | #include "Morfeusz.hpp" |
14 | 15 | #include "const.hpp" |
15 | 16 | |
17 | +#include "cli/cli.hpp" | |
18 | + | |
16 | 19 | using namespace std; |
20 | +using namespace ez; | |
21 | + | |
22 | +int main(int argc, const char** argv) { | |
23 | + | |
24 | + ezOptionParser opt; | |
25 | + | |
26 | + opt.overview = "Morfeusz analyzer"; | |
27 | + opt.syntax = string(argv[0]) + " [OPTIONS]"; | |
28 | + opt.example = string(argv[0]) + " --aggl strict --praet split --input /path/to/file.fsa"; | |
29 | + // opt.footer = "Morfeusz Copyright (C) 2014\n"; | |
30 | + | |
31 | + opt.add( | |
32 | + "", // Default. | |
33 | + 0, // Required? | |
34 | + 0, // Number of args expected. | |
35 | + 0, // Delimiter if expecting multiple args. | |
36 | + "Display usage instructions.", // Help description. | |
37 | + "-h", // Flag token. | |
38 | + "-help", // Flag token. | |
39 | + "--help", // Flag token. | |
40 | + "--usage" // Flag token. | |
41 | + ); | |
42 | + | |
43 | + opt.add( | |
44 | + "", // Default. | |
45 | + 0, // Required? | |
46 | + 1, // Number of args expected. | |
47 | + 0, // Delimiter if expecting multiple args. | |
48 | + "file with analyzer finite state automaton and data, created with buildfsa.py script.", // Help description. | |
49 | + "-i", // Flag token. | |
50 | + "-input", // Flag token. | |
51 | + "--input" // Flag token. | |
52 | + ); | |
53 | + | |
54 | + opt.add( | |
55 | + "", // Default. | |
56 | + 0, // Required? | |
57 | + 1, // Number of args expected. | |
58 | + 0, // Delimiter if expecting multiple args. | |
59 | + "aggl option.", // Help description. | |
60 | + "-a", // Flag token. | |
61 | + "-aggl", // Flag token. | |
62 | + "--aggl" // Flag token. | |
63 | + ); | |
64 | + | |
65 | + opt.add( | |
66 | + "", // Default. | |
67 | + 0, // Required? | |
68 | + 1, // Number of args expected. | |
69 | + 0, // Delimiter if expecting multiple args. | |
70 | + "praet option.", // Help description. | |
71 | + "-p", // Flag token. | |
72 | + "-praet", // Flag token. | |
73 | + "--praet" // Flag token. | |
74 | + ); | |
75 | + | |
76 | + opt.parse(argc, argv); | |
77 | + | |
78 | + if (opt.isSet("-h")) { | |
79 | + printCLIUsage(opt, cout); | |
80 | + return 0; | |
81 | + } | |
17 | 82 | |
18 | -int main(int argc, char** argv) { | |
19 | 83 | Morfeusz morfeusz; |
20 | - if (argc > 1) { | |
21 | - morfeusz.setAnalyzerFile(argv[1]); | |
22 | - printf("Using dictionary from %s\n", argv[1]); | |
84 | + if (opt.isSet("-i")) { | |
85 | + string analyzerFile; | |
86 | + opt.get("-i")->getString(analyzerFile); | |
87 | + morfeusz.setAnalyzerFile(analyzerFile); | |
88 | + printf("Using dictionary from %s\n", analyzerFile.c_str()); | |
89 | + } | |
90 | + if (opt.isSet("-a")) { | |
91 | + string aggl; | |
92 | + opt.get("-a")->getString(aggl); | |
93 | + cerr << "setting aggl option to " << aggl << endl; | |
94 | + morfeusz.setAggl(aggl); | |
95 | + } | |
96 | + if (opt.isSet("-p")) { | |
97 | + string praet; | |
98 | + opt.get("-p")->getString(praet); | |
99 | + cerr << "setting praet option to " << praet << endl; | |
100 | + morfeusz.setPraet(praet); | |
23 | 101 | } |
24 | 102 | #ifdef _WIN32 |
25 | 103 | morfeusz.setCharset(CP852); |
... | ... | @@ -37,10 +115,11 @@ int main(int argc, char** argv) { |
37 | 115 | printf("["); |
38 | 116 | for (unsigned int i = 0; i < res.size(); i++) { |
39 | 117 | MorphInterpretation& mi = res[i]; |
40 | - if (prevStart != -1 | |
118 | + if (prevStart != -1 | |
41 | 119 | && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) { |
42 | 120 | printf("]\n["); |
43 | - } else if (prevStart != -1) { | |
121 | + } | |
122 | + else if (prevStart != -1) { | |
44 | 123 | printf("; "); |
45 | 124 | } |
46 | 125 | printf("%d,%d,%s,%s,%s,%s", |
... | ... |
morfeusz/segrules/segrules.cpp
... | ... | @@ -28,8 +28,12 @@ static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) { |
28 | 28 | |
29 | 29 | static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) { |
30 | 30 | SegrulesOptions res; |
31 | - res["aggl"] = deserializeString(ptr); | |
32 | - res["praet"] = deserializeString(ptr); | |
31 | + unsigned char optsNum = *ptr; | |
32 | + ptr++; | |
33 | + for (unsigned char i = 0; i < optsNum; i++) { | |
34 | + string key = deserializeString(ptr); | |
35 | + res[key] = deserializeString(ptr); | |
36 | + } | |
33 | 37 | return res; |
34 | 38 | } |
35 | 39 | |
... | ... | @@ -54,3 +58,35 @@ map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* an |
54 | 58 | } |
55 | 59 | return res; |
56 | 60 | } |
61 | + | |
62 | +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr) { | |
63 | + const unsigned char* fsasMapPtr = getFSAsMapPtr(ptr); | |
64 | + const unsigned char* currPtr = fsasMapPtr; | |
65 | + unsigned char fsasNum = *currPtr; | |
66 | + currPtr++; | |
67 | + for (unsigned char i = 0; i < fsasNum; i++) { | |
68 | + deserializeOptions(currPtr); | |
69 | + deserializeFSA(currPtr); | |
70 | + } | |
71 | + return deserializeOptions(currPtr); | |
72 | +} | |
73 | + | |
74 | +SegrulesFSA* getDefaultSegrulesFSA( | |
75 | + const map<SegrulesOptions, SegrulesFSA*>& map, | |
76 | + const unsigned char* ptr) { | |
77 | + SegrulesOptions opts = getDefaultSegrulesOptions(ptr); | |
78 | + return (*(map.find(opts))).second; | |
79 | +} | |
80 | + | |
81 | +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res) { | |
82 | + map<SegrulesOptions, SegrulesFSA*>::const_iterator it = res.begin(); | |
83 | + while (it != res.end()) { | |
84 | + SegrulesOptions::const_iterator it1 = it->first.begin(); | |
85 | + while (it1 != it->first.end()) { | |
86 | + cerr << it1->first << " --> " << it1->second << endl; | |
87 | + it1++; | |
88 | + } | |
89 | + cerr << it->second << endl; | |
90 | + it++; | |
91 | + } | |
92 | +} | |
... | ... |
morfeusz/segrules/segrules.hpp
... | ... | @@ -18,6 +18,9 @@ typedef std::map<std::string, std::string> SegrulesOptions; |
18 | 18 | //typedef FSA<unsigned char> SegrulesFSAType; |
19 | 19 | |
20 | 20 | std::map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* analyzerPtr); |
21 | +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr); | |
22 | +SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map, const unsigned char* analyzerPtr); | |
23 | +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res); | |
21 | 24 | |
22 | 25 | #endif /* SEGRULES_HPP */ |
23 | 26 | |
... | ... |
nbproject/configurations.xml
... | ... | @@ -279,7 +279,7 @@ |
279 | 279 | <ccTool> |
280 | 280 | <incDir> |
281 | 281 | <pElem>morfeusz</pElem> |
282 | - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | |
282 | + <pElem>/usr/lib/jvm/default-java/include</pElem> | |
283 | 283 | </incDir> |
284 | 284 | <preprocessorList> |
285 | 285 | <Elem>libjmorfeusz_EXPORTS</Elem> |
... | ... |