Commit 9cacc587215ba16ccaea4e7ff865ad40a36596ac

Authored by Michał Lenart
1 parent f1e52ff4

- dodanie opcji aggl i praet

- dodanie obsługi CLI w morfeusz_analyzer
- ogólne ogarnięcie generatora


git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@115 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -5,7 +5,7 @@ project (Morfeusz)
5 5 set (Morfeusz_VERSION_MAJOR 2)
6 6 set (Morfeusz_VERSION_MINOR 0)
7 7 set (Morfeusz_VERSION_PATCH 0)
8   -set (CMAKE_BUILD_TYPE "Release")
  8 +set (CMAKE_BUILD_TYPE "Debug")
9 9  
10 10 enable_testing()
11 11  
... ... @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "")
36 36 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
37 37 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt)
38 38 else ()
39   - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
  39 + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
40 40 endif ()
41 41 endif ()
42 42  
... ... @@ -109,7 +109,7 @@ set (CPACK_OUTPUT_FILE_PREFIX "${TARGET_DIR}")
109 109 if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
110 110 set (CPACK_GENERATOR "DEB" "TGZ")
111 111 #debian
112   - set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz")
  112 + set (CPACK_DEBIAN_PACKAGE_NAME "morfeusz2")
113 113 set (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}")
114 114 set (CPACK_DEBIAN_PACKAGE_DEPENDS "libstdc++6 (>= 4.6)")
115 115 set (CPACK_DEBIAN_PACKAGE_ARCHITECTURE "${ARCHITECTURE}")
... ...
fsabuilder/buildfsa.py
... ... @@ -261,9 +261,11 @@ def main(opts):
261 261 if __name__ == '__main__':
262 262 import os
263 263 opts = _parseOptions()
264   -# try:
265   - main(opts)
  264 + try:
  265 + main(opts)
266 266 # except Exception as ex:
267   -# raise ex
268 267 # print >> sys.stderr, unicode(ex).encode('utf8')
  268 +# sys.exit(1)
  269 + finally:
  270 + pass
269 271  
... ...
fsabuilder/morfeuszbuilder/fsa/encode.py
... ... @@ -26,7 +26,6 @@ class Encoder(object):
26 26  
27 27 def encodeData(self, data):
28 28 raise NotImplementedError()
29   -# return bytearray(u'|'.join(data).encode(self.encoding)) + bytearray([0])
30 29  
31 30 def decodeData(self, rawData):
32 31 return NotImplementedError()
... ... @@ -134,16 +133,8 @@ class Encoder(object):
134 133  
135 134 for typenum, interpsList in segnum2Interps.iteritems():
136 135 res.extend(self._encodeInterps4Type(typenum, interpsList, withCasePattern, withPrefix))
137   -
138   -
139   -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()):
140   -# encodedInterpsList.extend(self._encodeTypeNum(interp.typenum))
141   -# encodedInterpsList.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=withCasePattern, withPrefix=withPrefix))
142   -# encodedInterpsList.extend(self._encodeTagNum(interp.tagnum))
143   -# encodedInterpsList.extend(self._encodeNameNum(interp.namenum))
144 136 del interpsList
145   -# res.extend(serializationUtils.htons(len(encodedInterpsList)))
146   -# res.extend(encodedInterpsList)
  137 +
147 138 return res
148 139  
149 140 class MorphEncoder(Encoder):
... ... @@ -156,19 +147,6 @@ class MorphEncoder(Encoder):
156 147  
157 148 def encodeData(self, interpsList):
158 149 return self._doEncodeData(interpsList, withCasePattern=True, withPrefix=False)
159   -# res = bytearray()
160   -# firstByte = len(interpsList)
161   -# assert firstByte < 256
162   -# assert firstByte > 0
163   -# res.append(firstByte)
164   -# assert type(interpsList) == frozenset
165   -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()):
166   -# res.extend(self._encodeTypeNum(interp.typenum))
167   -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=True, withPrefix=False))
168   -# res.extend(self._encodeTagNum(interp.tagnum))
169   -# res.extend(self._encodeNameNum(interp.namenum))
170   -# del interpsList
171   -# return res
172 150  
173 151 class Encoder4Generator(Encoder):
174 152  
... ... @@ -177,18 +155,3 @@ class Encoder4Generator(Encoder):
177 155  
178 156 def encodeData(self, interpsList):
179 157 return self._doEncodeData(interpsList, withCasePattern=False, withPrefix=True)
180   -# res = bytearray()
181   -# firstByte = len(interpsList)
182   -# assert firstByte < 256
183   -# assert firstByte > 0
184   -# res.append(firstByte)
185   -# assert type(interpsList) == frozenset
186   -# for interp in sorted(interpsList, key=lambda i: i.getSortKey()):
187   -# res.extend(self._encodeTypeNum(interp.typenum))
188   -# res.extend(self._encodeEncodedForm(interp.encodedForm, withCasePattern=False, withPrefix=True))
189   -# res.extend(self._encodeTagNum(interp.tagnum))
190   -# res.extend(self._encodeNameNum(interp.namenum))
191   -# return res
192   -#
193   -# def decodeData(self, data):
194   -#
... ...
fsabuilder/morfeuszbuilder/segrules/rulesManager.py
... ... @@ -11,6 +11,7 @@ class RulesManager(object):
11 11 def __init__(self, segtypes):
12 12 self.options2DFA = {}
13 13 self.segtypes = segtypes
  14 + self.defaultOptions = None
14 15  
15 16 def _options2Key(self, optionsMap):
16 17 return frozenset(optionsMap.items())
... ... @@ -21,6 +22,9 @@ class RulesManager(object):
21 22 def getDFA(self, optionsMap):
22 23 return self.options2DFA[self._options2Key(optionsMap)]
23 24  
  25 + def setDefaultOptions(self, key2Def):
  26 + self.defaultOptions = key2Def
  27 +
24 28 def addDFA(self, optionsMap, dfa):
25 29 self.options2DFA[self._options2Key(optionsMap)] = dfa
26 30  
... ... @@ -40,13 +44,17 @@ class RulesManager(object):
40 44 optionsMap = self._key2Options(key)
41 45 res.extend(self._serializeOptionsMap(optionsMap))
42 46 res.extend(self._serializeDFA(dfa))
  47 + res.extend(self._serializeOptionsMap(self.defaultOptions))
43 48 logging.info('segmentation rules size: %s bytes', len(res))
44 49 return res
45 50  
46 51 def _serializeOptionsMap(self, optionsMap):
47 52 assert len(optionsMap) < 256
48 53 res = bytearray()
  54 + res.append(2)
  55 + res.extend(self._serializeString('aggl'))
49 56 res.extend(self._serializeString(optionsMap['aggl']))
  57 + res.extend(self._serializeString('praet'))
50 58 res.extend(self._serializeString(optionsMap['praet']))
51 59 return res
52 60  
... ... @@ -63,4 +71,4 @@ class RulesManager(object):
63 71 # res.append(len(string))
64 72 res.extend(string.encode('utf8'))
65 73 res.append(0)
66   - return res
67 74 \ No newline at end of file
  75 + return res
... ...
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
... ... @@ -46,14 +46,14 @@ class RulesParser(object):
46 46 def2Key[define] = key
47 47  
48 48 firstNFA = None
49   - for defs in itertools.product(*key2Defs.values()):
  49 + for idx, defs in enumerate(itertools.product(*key2Defs.values())):
50 50 key2Def = dict([(def2Key[define], define) for define in defs])
51 51 # print key2Def
52 52 nfa = rulesNFA.RulesNFA()
53 53 if not firstNFA:
54 54 firstNFA = nfa
55 55 section = 'combinations' if self.rulesType == RulesParser.PARSE4ANALYZER else 'generator combinations'
56   - combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section)
  56 + combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(section, ignoreComments=False)
57 57 combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs, filename))
58 58 for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename):
59 59 # print rule
... ... @@ -64,6 +64,8 @@ class RulesParser(object):
64 64 # dfa.debug()
65 65 # print dfa.tryToRecognize(bytearray([14]))
66 66 res.addDFA(key2Def, dfa)
  67 + if idx == 0:
  68 + res.setDefaultOptions(key2Def)
67 69 return res
68 70  
69 71 def _doParse(self, combinationEnumeratedLines, segtypesHelper, filename):
... ...
fsabuilder/morfeuszbuilder/utils/configFile.py
... ... @@ -46,8 +46,13 @@ class ConfigFile(object):
46 46 else:
47 47 return None
48 48  
49   - def enumerateLinesInSection(self, sectionName):
50   - return self.section2Lines[sectionName]
  49 + def enumerateLinesInSection(self, sectionName, ignoreComments=True):
  50 + if sectionName not in self.section2Lines:
  51 + raise exceptions.ConfigFileException(self.filename, None, u'Missing section: "%s"' % sectionName)
  52 + if not ignoreComments:
  53 + return self.section2Lines[sectionName]
  54 + else:
  55 + return [(linenum, line) for (linenum, line) in self.section2Lines[sectionName] if not line.startswith('#')]
51 56  
52 57 def _parse(self):
53 58 with codecs.open(self.filename, 'r', 'utf8') as f:
... ...
fsabuilder/morfeuszbuilder/utils/exceptions.py
... ... @@ -31,4 +31,7 @@ class ConfigFileException(FSABuilderException):
31 31 self.msg = msg
32 32  
33 33 def __str__(self):
34   - return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
  34 + if self.lineNum:
  35 + return u'%s:%d - %s' % (self.filename, self.lineNum, self.msg)
  36 + else:
  37 + return u'%s - %s' % (self.filename, self.msg)
... ...
input/segmenty.dat
... ... @@ -3,6 +3,7 @@ aggl=permissive strict isolated
3 3 praet=split composite
4 4  
5 5 [combinations]
  6 +
6 7 #define wsz_interp (interp|kropka|dywiz)*
7 8  
8 9 #define moze_interp(segmenty) wsz_interp segmenty wsz_interp
... ... @@ -19,7 +20,7 @@ samotny
19 20 # przeszlik pojedynczy w formie nieaglutynacyjnej, np. „gniótł”:
20 21 moze_interp(praet_sg_na)
21 22  
22   -# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „moze”:
  23 +# przeszlik pojedynczy w formie niezróżnicowanej aglutynacyjnie, np. „czytał”:
23 24 moze_interp(praet_sg)
24 25  
25 26 # przeszlik mnogi, np. „czytali”:
... ... @@ -69,7 +70,7 @@ moze_interp(praet_sg by aglsg)
69 70 # np. „gnietli·by·śmy”
70 71 moze_interp(praet_pl by aglpl)
71 72 #else
72   -# moze_interp(praetcond)
  73 +#moze_interp(praetcond)
73 74 #endif
74 75 # np. „by·ś”
75 76 moze_interp(by aglsg)
... ... @@ -97,13 +98,13 @@ moze_interp( (adja dywiz)+ adj )
97 98 # adja dywiz adja dywiz adja dywiz adj interp?
98 99 # adja dywiz adja dywiz adja dywiz adja dywiz adj interp?
99 100  
100   -# Formy zanegowane stopnia wyższego przymiotników i przysłówków (WK)
101   -# np. „nie·grzeczniejszy”, „nie·grzeczniej”
102   -moze_interp( nie> adj_com )
  101 +# Stopień najwyższy:
  102 +# np. „naj·zieleńszy”, „naj·mądrzej”
  103 +moze_interp( naj> adj_sup )
103 104  
104 105 # Formy „zanegowane” gerundiów i imiesłowów:
105 106 # np. „nie·czytanie”, „nie·przeczytany”, „nie·czytający”:
106   -moze_interp( nie> negat )
  107 +moze_interp( nie > negat )
107 108  
108 109 # Przyimki akceptujące krótką formę „-ń”
109 110 moze_interp(z_on_agl)
... ... @@ -115,17 +116,11 @@ moze_interp( dig&gt;* dig )
115 116  
116 117 # Formacje prefiksalne
117 118 #### trzeba wydzielić odpowiednie samodze!
118   -# rzeczownikowe
119   -# np. „euro·sodoma”, „e-·papieros”
120   -moze_interp(nomina)
121   -moze_interp( prefs> nomina )
  119 +# rzeczownikowe i przymiotnikowe
  120 +# np. „euro·sodoma”, „e-·papieros”, „euro·sodomski”, „bez·argumentowy”
  121 +moze_interp( prefs> samodz )
122 122 # czasownikowe np. „po·nakapywać”
123   -moze_interp(verba_imperf)
124   -moze_interp( prefv> verba_imperf )
125   -# przymiotnikowe np. „do·żylny”, „euro·sodomski”, „bez·argumentowy”
126   -moze_interp(adjectiva)
127   -moze_interp(prefa> adj)
128   -moze_interp( prefa> adjectiva )
  123 +moze_interp( prefv> samodz )
129 124  
130 125 # Apozycje z dywizem
131 126 # np. „kobieta-prezydent”
... ... @@ -138,26 +133,12 @@ adj dywiz samodz
138 133 # ?
139 134 samodz dywiz adj
140 135  
141   -#### PONIŻEJ REGUŁY WK
142   -# Stopień najwyższy:
143   -# np. „naj·zieleńszy”, „naj·mądrzej”
144   -moze_interp( naj> adj_sup )
145   -moze_interp( nie> naj> adj_sup )
146   -# Cząstka li przy osobowych formach czasownika oddzielona dywizem: znasz-li ten kraj
147   -moze_interp( praet_sg dywiz li)
148   -moze_interp( praet_pl dywiz li)
149   -moze_interp( praet_sg_na dywiz li)
150   -moze_interp( fin dywiz li)
151   -
152   -# i bez dywizu --- czy bez dywizu jest sens to łapać?
153   -#moze_interp( praet_sg li)
154   -#moze_interp( praet_pl li)
155   -#moze_interp( praet_sg_na li)
156   -#moze_interp( fin li)
157   -
158 136 [generator combinations]
159   -prefs> nomina
160   -nomina
  137 +adj
  138 +adj_sup
  139 +samodz
  140 +prefs> samodz
  141 +prefv> samodz
161 142  
162 143 [segment types]
163 144 naj
... ... @@ -193,45 +174,16 @@ dywiz
193 174 kropka
194 175 samodz
195 176  
196   -[lexemes]
197   -z_aglt aby:comp
198   -z_aglt bowiem:comp
199   -by by:qub
200   -li li:qub
201   -z_aglt by:comp
202   -z_aglt cóż:subst
203   -z_aglt czemu:adv
204   -z_aglt czyżby:qub
205   -z_aglt choćby:comp
206   -z_aglt chociażby:comp
207   -z_aglt dlaczego:adv
208   -z_aglt dopóki:comp
209   -z_aglt dopóty:conj
210   -z_aglt gdyby:comp
211   -z_aglt gdzie:qub
212   -z_aglt gdzie:adv
213   -z_aglt jakby:comp
214   -z_aglt jakoby:comp
215   -z_aglt kiedy:adv
216   -z_aglt kiedy:comp
217   -z_aglt tylko:qub
218   -z_aglt żeby:comp
219   -dywiz -:interp
220   -kropka .:interp
221   -
222 177 [tags]
223 178 naj naj
224 179 nie nie
225 180 prefs prefs
226 181 prefv prefv
227   -prefa prefa
228 182 dig dig
229 183 adja adja
230 184 adj adj:%:pos
231 185 adj_sup adj:%:sup
232 186 adj_sup adv:sup
233   -adj_com adj:%:com
234   -adj_com adj:%:com
235 187 negat ger:%:neg
236 188 negat pact:%:neg
237 189 negat ppas:%:neg
... ... @@ -243,22 +195,39 @@ samotny interj
243 195 interp interp
244 196 aglsg aglt:sg:%
245 197 aglpl aglt:pl:%
  198 +#praetcond cond:%
  199 +#praetcond praet:%:pri:%
  200 +#praetcond praet:%:sec:%
  201 +#praetcond praet:%:ter:%
246 202 praet_sg_agl praet:sg:%:agl
247 203 praet_sg_na praet:sg:%:nagl
248 204 praet_sg praet:sg:%
249 205 praet_pl praet:pl:%
250 206 praet_sg winien:sg:%
251 207 praet_pl winien:pl:%
252   -fin fin:%
253   -nomina subst:%
254   -nomina ger:%
255   -nomina depr:%
256   -adjectiva adv:%
257   -adjectiva ppas:%
258   -adjectiva pact:%
259   -verba_imperf praet:%:imperf
260   -verba_imperf fin:%:imperf
261   -verba_imperf inf:imperf
262   -verba_imperf imps:imperf
263   -verba_imperf impt:%:imperf
264 208 samodz %
  209 +
  210 +[lexemes]
  211 +z_aglt aby:comp
  212 +z_aglt bowiem:comp
  213 +by by:qub
  214 +z_aglt by:comp
  215 +z_aglt cóż:subst
  216 +z_aglt czemu:adv
  217 +z_aglt czyżby:qub
  218 +z_aglt choćby:comp
  219 +z_aglt chociażby:comp
  220 +z_aglt dlaczego:adv
  221 +z_aglt dopóki:comp
  222 +z_aglt dopóty:conj
  223 +z_aglt gdyby:comp
  224 +z_aglt gdzie:qub
  225 +z_aglt gdzie:adv
  226 +z_aglt jakby:comp
  227 +z_aglt jakoby:comp
  228 +z_aglt kiedy:adv
  229 +z_aglt kiedy:comp
  230 +z_aglt tylko:qub
  231 +z_aglt żeby:comp
  232 +dywiz -:interp
  233 +kropka .:interp
... ...
morfeusz/CMakeLists.txt
1 1  
  2 +
2 3 ########## generate default dictionary data #################
3 4 add_custom_command (
4 5 OUTPUT "${INPUT_DICTIONARY_CPP}"
... ... @@ -57,7 +58,7 @@ set(INCLUDE_FILES
57 58  
58 59 add_library (libmorfeusz SHARED ${SRC_FILES})
59 60 set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE)
60   -set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz")
  61 +set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2")
61 62  
62 63 add_executable (morfeusz_analyzer morfeusz_analyzer.cpp)
63 64 add_executable (morfeusz_generator morfeusz_generator.cpp)
... ...
morfeusz/Environment.cpp
... ... @@ -21,13 +21,6 @@ static Deserializer&lt;vector&lt;InterpsGroup&gt; &gt;&amp; initializeDeserializer(MorfeuszProce
21 21 return *(processorType == ANALYZER ? analyzerDeserializer : generatorDeserializer);
22 22 }
23 23  
24   -static SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map) {
25   - SegrulesOptions opts;
26   - opts["aggl"] = "isolated";
27   - opts["praet"] = "split";
28   - return (*(map.find(opts))).second;
29   -}
30   -
31 24 static void deleteSegrulesFSAs(std::map<SegrulesOptions, SegrulesFSA*>& fsasMap) {
32 25 for (
33 26 std::map<SegrulesOptions, SegrulesFSA*>::iterator it = fsasMap.begin();
... ... @@ -43,23 +36,23 @@ Environment::Environment(
43 36 MorfeuszProcessorType processorType,
44 37 const unsigned char* fsaFileStartPtr)
45 38 : currentCharsetConverter(getCharsetConverter(charset)),
46   - utf8CharsetConverter(),
47   - isoCharsetConverter(),
48   - cp1250CharsetConverter(),
49   - cp852CharsetConverter(),
50   - caseConverter(),
51   - tagset(fsaFileStartPtr),
52   - fsaFileStartPtr(fsaFileStartPtr),
53   - fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
54   - segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)),
55   - currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap)),
56   - isFromFile(false),
57   - chunksDecoder(
58   - processorType == ANALYZER
59   - ? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
60   - : (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)),
61   - processorType(processorType)
62   - {
  39 +utf8CharsetConverter(),
  40 +isoCharsetConverter(),
  41 +cp1250CharsetConverter(),
  42 +cp852CharsetConverter(),
  43 +caseConverter(),
  44 +tagset(fsaFileStartPtr),
  45 +fsaFileStartPtr(fsaFileStartPtr),
  46 +fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
  47 +segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)),
  48 +currSegrulesOptions(getDefaultSegrulesOptions(fsaFileStartPtr)),
  49 +currSegrulesFSA(getDefaultSegrulesFSA(segrulesFSAsMap, fsaFileStartPtr)),
  50 +isFromFile(false),
  51 +chunksDecoder(
  52 +processorType == ANALYZER
  53 +? (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Analyzer(*this)
  54 +: (InterpretedChunksDecoder*) new InterpretedChunksDecoder4Generator(*this)),
  55 +processorType(processorType) {
63 56 }
64 57  
65 58 const CharsetConverter* Environment::getCharsetConverter(MorfeuszCharset charset) const {
... ... @@ -129,3 +122,16 @@ const FSAType&amp; Environment::getFSA() const {
129 122 const InterpretedChunksDecoder& Environment::getInterpretedChunksDecoder() const {
130 123 return *(this->chunksDecoder);
131 124 }
  125 +
  126 +void Environment::setSegrulesOption(const std::string& option, const std::string& value) {
  127 + if (this->currSegrulesOptions.find(option) == this->currSegrulesOptions.end()) {
  128 + throw MorfeuszException("Invalid segmentation option '"+option+"'");
  129 + }
  130 + SegrulesOptions prevOptions = this->currSegrulesOptions;
  131 + this->currSegrulesOptions[option] = value;
  132 + if (this->segrulesFSAsMap.find(this->currSegrulesOptions) == this->segrulesFSAsMap.end()) {
  133 + this->currSegrulesOptions = prevOptions;
  134 + throw MorfeuszException("Invalid '"+option+"' option value: '"+value+"'");
  135 + }
  136 + this->currSegrulesFSA = this->segrulesFSAsMap.find(this->currSegrulesOptions)->second;
  137 +}
... ...
morfeusz/Environment.hpp
... ... @@ -41,6 +41,8 @@ public:
41 41  
42 42 void setFSAFile(const std::string& filename);
43 43  
  44 + void setSegrulesOption(const std::string& option, const std::string& value);
  45 +
44 46 const SegrulesFSA& getCurrentSegrulesFSA() const;
45 47  
46 48 const FSAType& getFSA() const;
... ... @@ -60,6 +62,7 @@ private:
60 62 const unsigned char* fsaFileStartPtr;
61 63 const FSAType* fsa;
62 64 std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap;
  65 + SegrulesOptions currSegrulesOptions;
63 66 const SegrulesFSA* currSegrulesFSA;
64 67 bool isFromFile;
65 68  
... ...
morfeusz/InterpretedChunksDecoder.hpp
... ... @@ -29,21 +29,40 @@ public:
29 29 : env(env) {
30 30 }
31 31  
  32 + virtual ~InterpretedChunksDecoder() {
  33 + }
  34 +
32 35 virtual void decode(
33 36 unsigned int startNode,
34 37 unsigned int endNode,
35 38 const InterpretedChunk& interpretedChunk,
36 39 std::vector<MorphInterpretation>& out) const = 0;
37 40  
38   - virtual ~InterpretedChunksDecoder() {
39   - }
40   -
41 41 protected:
  42 +
  43 + virtual MorphInterpretation decodeMorphInterpretation(
  44 + unsigned int startNode, unsigned int endNode,
  45 + const string& orth,
  46 + const string& lemmaPrefix,
  47 + const InterpretedChunk& chunk,
  48 + const unsigned char*& ptr) const = 0;
42 49  
43 50 virtual void decodeForm(
44 51 const std::vector<uint32_t>& orth,
45 52 const EncodedForm& form,
46 53 std::string& res) const = 0;
  54 +
  55 + EncodedInterpretation deserializeInterp(const unsigned char*& ptr) const {
  56 + EncodedInterpretation interp;
  57 + deserializeEncodedForm(ptr, interp.value);
  58 + interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr)));
  59 + ptr += 2;
  60 + interp.nameClassifier = *ptr;
  61 + ptr++;
  62 + return interp;
  63 + }
  64 +
  65 + virtual void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const = 0;
47 66  
48 67 const Environment& env;
49 68 };
... ... @@ -53,7 +72,7 @@ public:
53 72  
54 73 InterpretedChunksDecoder4Analyzer(const Environment& env) : InterpretedChunksDecoder(env) {
55 74 }
56   -
  75 +
57 76 void decode(
58 77 unsigned int startNode,
59 78 unsigned int endNode,
... ... @@ -90,42 +109,12 @@ protected:
90 109 }
91 110 }
92 111  
93   -private:
94   -
95   - void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& originalForm, std::string& decodedForm) const {
96   - for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
97   - const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
98   - originalForm += env.getCharsetConverter().toString(prefixChunk.originalCodepoints);
99   - const unsigned char* ptr = prefixChunk.interpsGroup.ptr;
100   - MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, originalForm, string(""), prefixChunk, ptr);
101   - decodedForm += mi.getLemma();
102   - }
103   - }
104   -
105   - MorphInterpretation decodeMorphInterpretation(
106   - unsigned int startNode, unsigned int endNode,
107   - const string& orth,
108   - const string& lemmaPrefix,
109   - const InterpretedChunk& chunk,
110   - const unsigned char*& ptr) const {
111   - string lemma = lemmaPrefix;
112   - EncodedInterpretation ei = this->decodeInterp(ptr);
113   - this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma);
114   - return MorphInterpretation(
115   - startNode, endNode,
116   - orth, lemma,
117   - ei.tag,
118   - ei.nameClassifier,
119   - env.getTagset(),
120   - env.getCharsetConverter());
121   - }
122   -
123   - void decodeLemma(const unsigned char*& ptr, EncodedForm& lemma) const {
124   - lemma.suffixToCut = *ptr;
  112 + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const {
  113 + encodedForm.suffixToCut = *ptr;
125 114 ptr++;
126   - lemma.suffixToAdd = (const char*) ptr;
  115 + encodedForm.suffixToAdd = (const char*) ptr;
127 116 ptr += strlen((const char*) ptr) + 1;
128   - assert(lemma.casePattern.size() == 0);
  117 + assert(encodedForm.casePattern.size() == 0);
129 118 // lemma.casePattern.resize(MAX_WORD_SIZE, false);
130 119 uint8_t casePatternType = *ptr;
131 120 ptr++;
... ... @@ -139,7 +128,7 @@ private:
139 128 ptr++;
140 129 for (unsigned int i = 0; i < prefixLength; i++) {
141 130 // lemma.casePattern[i] = true;
142   - lemma.casePattern.push_back(true);
  131 + encodedForm.casePattern.push_back(true);
143 132 }
144 133 // lemma.casePattern.resize(prefixLength, true);
145 134 break;
... ... @@ -150,21 +139,40 @@ private:
150 139 uint8_t idx = *ptr;
151 140 ptr++;
152 141 // lemma.casePattern[idx] = true;
153   - lemma.casePattern.resize(idx + 1, false);
154   - lemma.casePattern[idx] = true;
  142 + encodedForm.casePattern.resize(idx + 1, false);
  143 + encodedForm.casePattern[idx] = true;
155 144 }
156 145 break;
157 146 }
158 147 }
159   -
160   - EncodedInterpretation decodeInterp(const unsigned char*& ptr) const {
161   - EncodedInterpretation interp;
162   - decodeLemma(ptr, interp.value);
163   - interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr)));
164   - ptr += 2;
165   - interp.nameClassifier = *ptr;
166   - ptr++;
167   - return interp;
  148 +private:
  149 +
  150 + MorphInterpretation decodeMorphInterpretation(
  151 + unsigned int startNode, unsigned int endNode,
  152 + const string& orth,
  153 + const string& lemmaPrefix,
  154 + const InterpretedChunk& chunk,
  155 + const unsigned char*& ptr) const {
  156 + string lemma = lemmaPrefix;
  157 + EncodedInterpretation ei = this->deserializeInterp(ptr);
  158 + this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma);
  159 + return MorphInterpretation(
  160 + startNode, endNode,
  161 + orth, lemma,
  162 + ei.tag,
  163 + ei.nameClassifier,
  164 + env.getTagset(),
  165 + env.getCharsetConverter());
  166 + }
  167 +
  168 + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orth, std::string& lemmaPrefix) const {
  169 + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
  170 + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
  171 + orth += env.getCharsetConverter().toString(prefixChunk.originalCodepoints);
  172 + const unsigned char* ptr = prefixChunk.interpsGroup.ptr;
  173 + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orth, string(""), prefixChunk, ptr);
  174 + lemmaPrefix += mi.getLemma();
  175 + }
168 176 }
169 177 };
170 178  
... ... @@ -173,35 +181,51 @@ public:
173 181  
174 182 InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) {
175 183 }
176   -
  184 +
177 185 void decode(
178 186 unsigned int startNode,
179 187 unsigned int endNode,
180 188 const InterpretedChunk& interpretedChunk,
181 189 std::vector<MorphInterpretation>& out) const {
182   - // string orth;
183   - // string lemma;
184   - // convertPrefixes(interpretedChunk, lemma, orth);
185   - // size_t orthLength = orth.length();
186   - // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
187   - // for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) {
188   - // const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i];
189   - // decodeForm(
190   - // interpretedChunk.originalCodepoints,
191   - // ei.value,
192   - // orth);
193   - // out.push_back(MorphInterpretation(
194   - // startNode, endNode,
195   - // orth, lemma,
196   - // ei.tag,
197   - // ei.nameClassifier,
198   - // env.getTagset(),
199   - // env.getCharsetConverter()));
200   - // orth.erase(orthLength);
201   - // }
  190 + string orthPrefix;
  191 + string lemma;
  192 + convertPrefixes(interpretedChunk, orthPrefix, lemma);
  193 + lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
  194 + const unsigned char* currPtr = interpretedChunk.interpsGroup.ptr;
  195 + while (currPtr - interpretedChunk.interpsGroup.ptr < interpretedChunk.interpsGroup.size) {
  196 + out.push_back(this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr));
  197 + }
202 198 }
203 199  
204 200 private:
  201 +
  202 + void convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const {
  203 + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
  204 + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
  205 + lemma += env.getCharsetConverter().toString(prefixChunk.originalCodepoints);
  206 + const unsigned char* ptr = prefixChunk.interpsGroup.ptr;
  207 + MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr);
  208 + orthPrefix += mi.getOrth();
  209 + }
  210 + }
  211 +
  212 + MorphInterpretation decodeMorphInterpretation(
  213 + unsigned int startNode, unsigned int endNode,
  214 + const string& orthPrefix,
  215 + const string& lemma,
  216 + const InterpretedChunk& chunk,
  217 + const unsigned char*& ptr) const {
  218 + string orth = orthPrefix;
  219 + EncodedInterpretation ei = this->deserializeInterp(ptr);
  220 + this->decodeForm(chunk.originalCodepoints, ei.value, orth);
  221 + return MorphInterpretation(
  222 + startNode, endNode,
  223 + orth, lemma,
  224 + ei.tag,
  225 + ei.nameClassifier,
  226 + env.getTagset(),
  227 + env.getCharsetConverter());
  228 + }
205 229  
206 230 void decodeForm(
207 231 const vector<uint32_t>& lemma,
... ... @@ -218,6 +242,15 @@ private:
218 242 env.getCharsetConverter().append(cp, res);
219 243 }
220 244 }
  245 +
  246 + void deserializeEncodedForm(const unsigned char*& ptr, EncodedForm& encodedForm) const {
  247 + encodedForm.prefixToAdd = (const char*) ptr;
  248 + ptr += strlen((const char*) ptr) + 1;
  249 + encodedForm.suffixToCut = *ptr;
  250 + ptr++;
  251 + encodedForm.suffixToAdd = (const char*) ptr;
  252 + ptr += strlen((const char*) ptr) + 1;
  253 + }
221 254 };
222 255  
223 256 #endif /* INTERPSGROUPDECODER_HPP */
... ...
morfeusz/Morfeusz.cpp
... ... @@ -40,32 +40,13 @@ options(createDefaultOptions()) {
40 40  
41 41 void Morfeusz::setAnalyzerFile(const string& filename) {
42 42 this->analyzerEnv.setFSAFile(filename);
43   - // if (this->isAnalyzerFSAFromFile) {
44   - // delete this->analyzerFSA;
45   - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap);
46   - // delete this->analyzerPtr;
47   - // }
48   - // this->analyzerPtr = readFile<unsigned char>(filename.c_str());
49   - // this->analyzerFSA = FSA< vector<InterpsGroup> > ::getFSA(analyzerPtr, *initializeAnalyzerDeserializer());
50   - // this->analyzerSegrulesFSAsMap = createSegrulesFSAsMap(analyzerPtr);
51   - // this->isAnalyzerFSAFromFile = true;
52 43 }
53 44  
54 45 void Morfeusz::setGeneratorFile(const string& filename) {
55 46 this->generatorEnv.setFSAFile(filename);
56   - // if (this->isGeneratorFSAFromFile) {
57   - // delete this->generatorPtr;
58   - // }
59   - // this->generatorPtr = readFile<unsigned char>(filename.c_str());
60   - // this->generator.setGeneratorPtr(generatorPtr);
61 47 }
62 48  
63 49 Morfeusz::~Morfeusz() {
64   - // if (this->isAnalyzerFSAFromFile) {
65   - // delete this->analyzerFSA;
66   - // deleteSegrulesFSAs(this->analyzerSegrulesFSAsMap);
67   - // delete this->analyzerPtr;
68   - // }
69 50 }
70 51  
71 52 void Morfeusz::processOneWord(
... ... @@ -97,7 +78,6 @@ void Morfeusz::processOneWord(
97 78 }
98 79 srcNode++;
99 80 }
100   - // graph.getResults(*this->tagset, results);
101 81 }
102 82 else if (inputStart != inputEnd) {
103 83 this->appendIgnotiumToResults(env, string(inputStart, currInput), startNodeNum, results);
... ... @@ -140,6 +120,7 @@ void Morfeusz::doProcessOneWord(
140 120 vector<InterpsGroup> val(state.getValue());
141 121 for (unsigned int i = 0; i < val.size(); i++) {
142 122 InterpsGroup& ig = val[i];
  123 +// cerr << "accept at '" << currInput << "' type=" << (int) ig.type << endl;
143 124 set<SegrulesState> newSegrulesStates;
144 125 env.getCurrentSegrulesFSA().proceedToNext(ig.type, segrulesState, newSegrulesStates);
145 126 for (
... ... @@ -147,9 +128,6 @@ void Morfeusz::doProcessOneWord(
147 128 it != newSegrulesStates.end();
148 129 ++it) {
149 130 SegrulesState newSegrulesState = *it;
150   -// if (newSegrulesState.shiftOrthFromPrevious) {
151   -//
152   -// }
153 131 InterpretedChunk ic = {
154 132 inputData,
155 133 originalCodepoints,
... ... @@ -160,12 +138,19 @@ void Morfeusz::doProcessOneWord(
160 138 vector<InterpretedChunk>()
161 139 };
162 140 if (!accum.empty() && accum.back().shiftOrth) {
  141 +// cerr << "shift orth from " << (int) accum.back().interpsGroup.type << " to " << (int) ig.type << endl;
163 142 doShiftOrth(accum.back(), ic);
164 143 }
165 144 accum.push_back(ic);
166 145 if (isEndOfWord(codepoint)) {
167   - if (newSegrulesState.accepting)
  146 +// cerr << "end of word" << endl;
  147 + if (newSegrulesState.accepting) {
  148 +// cerr << "accept " << (int) ig.type << endl;
168 149 graph.addPath(accum);
  150 + }
  151 + else {
  152 +// cerr << "not accept " << (int) ig.type << endl;
  153 + }
169 154 }
170 155 else {
171 156 const char* newCurrInput = currInput;
... ... @@ -190,8 +175,6 @@ void Morfeusz::appendIgnotiumToResults(
190 175 }
191 176  
192 177 ResultsIterator Morfeusz::analyze(const string& text) const {
193   - // const char* textStart = text.c_str();
194   - // const char* textEnd = text.c_str() + text.length();
195 178 vector<MorphInterpretation> res;
196 179 this->analyze(text, res);
197 180 return ResultsIterator(res);
... ... @@ -207,29 +190,54 @@ void Morfeusz::analyze(const string&amp; text, vector&lt;MorphInterpretation&gt;&amp; results)
207 190 }
208 191  
209 192 ResultsIterator Morfeusz::generate(const string& text) const {
210   - // const char* textStart = text.c_str();
211   - // const char* textEnd = text.c_str() + text.length();
212 193 vector<MorphInterpretation> res;
213 194 this->generate(text, res);
214 195 return ResultsIterator(res);
215 196 }
216 197  
217   -void Morfeusz::generate(const string& text, vector<MorphInterpretation>& results) const {
218   - const char* input = text.c_str();
219   - const char* inputEnd = input + text.length();
  198 +ResultsIterator Morfeusz::generate(const string& text, int tagnum) const {
  199 + vector<MorphInterpretation> res;
  200 + this->generate(text, tagnum, res);
  201 + return ResultsIterator(res);
  202 +}
  203 +
  204 +void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& results) const {
  205 + const char* input = lemma.c_str();
  206 + const char* inputEnd = input + lemma.length();
220 207 while (input != inputEnd) {
221 208 int startNode = results.empty() ? 0 : results.back().getEndNode();
222 209 this->processOneWord(this->generatorEnv, input, inputEnd, startNode, results);
223 210 }
224 211 }
225 212  
  213 +// XXX - someday it should be improved
  214 +void Morfeusz::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const {
  215 + vector<MorphInterpretation> partRes;
  216 + this->generate(lemma, partRes);
  217 + for (unsigned int i = 0; i < partRes.size(); i++) {
  218 + if (partRes[i].getTagnum() == tagnum) {
  219 + result.push_back(partRes[i]);
  220 + }
  221 + }
  222 +}
  223 +
226 224 void Morfeusz::setCharset(MorfeuszCharset charset) {
227 225 this->options.encoding = charset;
228 226 this->analyzerEnv.setCharset(charset);
229 227 this->generatorEnv.setCharset(charset);
230 228 }
231 229  
232   -ResultsIterator::ResultsIterator(vector<MorphInterpretation>& res) {
  230 +void Morfeusz::setAggl(const std::string& aggl) {
  231 + this->analyzerEnv.setSegrulesOption("aggl", aggl);
  232 + this->generatorEnv.setSegrulesOption("aggl", aggl);
  233 +}
  234 +
  235 +void Morfeusz::setPraet(const std::string& praet) {
  236 + this->analyzerEnv.setSegrulesOption("praet", praet);
  237 + this->generatorEnv.setSegrulesOption("praet", praet);
  238 +}
  239 +
  240 +ResultsIterator::ResultsIterator(const vector<MorphInterpretation>& res) {
233 241 resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end());
234 242 }
235 243  
... ...
morfeusz/Morfeusz.hpp
... ... @@ -35,6 +35,12 @@ class ResultsIterator;
35 35  
36 36 typedef State< std::vector<InterpsGroup > > StateType;
37 37  
  38 +/**
  39 + * Performs morphological analysis (analyze methods) and syntesis (generate methods).
  40 + *
  41 + * It is NOT thread-safe
  42 + * but it is possible to use separate Morfeusz instance for each concurrent thread.
  43 + */
38 44 class Morfeusz {
39 45 public:
40 46  
... ... @@ -57,6 +63,9 @@ public:
57 63 */
58 64 void setGeneratorFile(const std::string& filename);
59 65  
  66 + /**
  67 + * Destroys Morfeusz object.
  68 + */
60 69 virtual ~Morfeusz();
61 70  
62 71 /**
... ... @@ -82,6 +91,16 @@ public:
82 91 * @return - iterator over morphological analysis results
83 92 */
84 93 ResultsIterator generate(const std::string& lemma) const;
  94 +
  95 + /**
  96 + * Perform morphological synthesis on a given lemma and return the results as iterator.
  97 + * Limit results to interpretations with the specified tag.
  98 + *
  99 + * @param text - text for morphological analysis
  100 + * @param tag - tag of result interpretations
  101 + * @return - iterator over morphological analysis results
  102 + */
  103 + ResultsIterator generate(const std::string& lemma, int tagnum) const;
85 104  
86 105 /**
87 106 * Perform morphological synthesis on a given lemma and put results in a vector.
... ... @@ -90,6 +109,16 @@ public:
90 109 * @param result - results vector
91 110 */
92 111 void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
  112 +
  113 + /**
  114 + * Perform morphological synthesis on a given lemma and put results in a vector.
  115 + * Limit results to interpretations with the specified tag.
  116 + *
  117 + * @param lemma - lemma to be analyzed
  118 + * @param tag - tag of result interpretations
  119 + * @param result - results vector
  120 + */
  121 + void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const;
93 122  
94 123 /**
95 124 * Set encoding for input and output string objects.
... ... @@ -97,6 +126,20 @@ public:
97 126 * @param encoding
98 127 */
99 128 void setCharset(MorfeuszCharset encoding);
  129 +
  130 + /**
  131 + * Set aggl segmentation option value.
  132 + *
  133 + * @param aggl
  134 + */
  135 + void setAggl(const std::string& aggl);
  136 +
  137 + /**
  138 + * Set praet segmentation option value.
  139 + *
  140 + * @param praet
  141 + */
  142 + void setPraet(const std::string& praet);
100 143  
101 144 friend class ResultsIterator;
102 145 private:
... ... @@ -121,19 +164,9 @@ private:
121 164 const std::string& word,
122 165 int startNodeNum,
123 166 std::vector<MorphInterpretation>& results) const;
  167 +
124 168 Environment analyzerEnv;
125 169 Environment generatorEnv;
126   -// const unsigned char* analyzerPtr;
127   -// FSAType* analyzerFSA;
128   -// std::map<SegrulesOptions, SegrulesFSA*> analyzerSegrulesFSAsMap;
129   -// SegrulesFSA* currAnalyzerSegrulesFSA;
130   -// bool isAnalyzerFSAFromFile;
131   -//
132   -// const unsigned char* generatorPtr;
133   -// FSAType* generatorFSA;
134   -// bool isGeneratorFSAFromFile;
135   -// Generator generator;
136   -
137 170 MorfeuszOptions options;
138 171 };
139 172  
... ... @@ -143,7 +176,7 @@ public:
143 176 bool hasNext();
144 177 friend class Morfeusz;
145 178 private:
146   - ResultsIterator(vector<MorphInterpretation>& res);
  179 + ResultsIterator(const std::vector<MorphInterpretation>& res);
147 180 const char* rawInput;
148 181 std::list<MorphInterpretation> resultsBuffer;
149 182 int startNode;
... ...
morfeusz/cli/cli.hpp 0 → 100644
  1 +/*
  2 + * File: cli.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 17 marzec 2014, 18:32
  6 + */
  7 +
  8 +#ifndef CLI_HPP
  9 +#define CLI_HPP
  10 +
  11 +#ifdef _WIN64
  12 +#define TMPDUPA_IN IN
  13 +#define IN IN
  14 +#else
  15 +#ifdef _WIN32
  16 +#define TMPDUPA_IN IN
  17 +#define IN IN
  18 +#endif
  19 +#endif
  20 +
  21 +#include <iostream>
  22 +
  23 +#pragma GCC diagnostic push
  24 +#pragma GCC diagnostic ignored "-Wsign-compare"
  25 +#pragma GCC diagnostic ignored "-Wpedantic"
  26 +#pragma GCC diagnostic ignored "-Wunused-variable"
  27 +#pragma GCC diagnostic ignored "-Wconversion"
  28 +#pragma GCC diagnostic ignored "-Wreorder"
  29 +#pragma GCC diagnostic ignored "-Wlong-long"
  30 +#pragma GCC diagnostic ignored "-Wunused-function"
  31 +#pragma GCC diagnostic ignored "-Wcast-qual"
  32 +#pragma GCC diagnostic ignored "-Wparentheses"
  33 +#pragma GCC diagnostic ignored "-Wformat-extra-args"
  34 +
  35 +#include "ezOptionParser.hpp"
  36 +
  37 +#pragma GCC diagnostic pop
  38 +
  39 +void printCLIUsage(ez::ezOptionParser& opt, std::ostream& out) {
  40 + std::string usage;
  41 + opt.getUsage(usage);
  42 + out << usage;
  43 +}
  44 +
  45 +#ifdef _WIN64
  46 +#define IN TMPDUPA_IN
  47 +#else
  48 +#ifdef _WIN32
  49 +#define IN TMPDUPA_IN
  50 +#endif
  51 +#endif
  52 +
  53 +#endif /* CLI_HPP */
  54 +
... ...
morfeusz/cli/ezOptionParser.hpp 0 → 100644
  1 +/*
  2 +This file is part of ezOptionParser. See MIT-LICENSE.
  3 +
  4 +Copyright (C) 2011,2012 Remik Ziemlinski <first d0t surname att gmail>
  5 +
  6 +CHANGELOG
  7 +
  8 +v0.0.0 20110505 rsz Created.
  9 +v0.1.0 20111006 rsz Added validator.
  10 +v0.1.1 20111012 rsz Fixed validation of ulonglong.
  11 +v0.1.2 20111126 rsz Allow flag names start with alphanumeric (previously, flag had to start with alpha).
  12 +v0.1.3 20120108 rsz Created work-around for unique id generation with IDGenerator that avoids retarded c++ translation unit linker errors with single-header static variables. Forced inline on all methods to please retard compiler and avoid multiple def errors.
  13 +v0.1.4 20120629 Enforced MIT license on all files.
  14 +v0.2.0 20121120 Added parseIndex to OptionGroup.
  15 +v0.2.1 20130506 Allow disabling doublespace of OPTIONS usage descriptions.
  16 +*/
  17 +#ifndef EZ_OPTION_PARSER_H
  18 +#define EZ_OPTION_PARSER_H
  19 +
  20 +#include <stdlib.h>
  21 +#include <vector>
  22 +#include <list>
  23 +#include <map>
  24 +#include <string>
  25 +#include <iostream>
  26 +#include <fstream>
  27 +#include <algorithm>
  28 +#include <limits>
  29 +#include <sstream>
  30 +#include <cstring>
  31 +
  32 +namespace ez {
  33 +#define DEBUGLINE() printf("%s:%d\n", __FILE__, __LINE__);
  34 +
  35 +/* ################################################################### */
  36 +template<typename T>
  37 +static T fromString(const std::string* s) {
  38 + std::istringstream stream (s->c_str());
  39 + T t;
  40 + stream >> t;
  41 + return t;
  42 +};
  43 +template<typename T>
  44 +static T fromString(const char* s) {
  45 + std::istringstream stream (s);
  46 + T t;
  47 + stream >> t;
  48 + return t;
  49 +};
  50 +/* ################################################################### */
  51 +static bool isdigit(const std::string & s, int i=0) {
  52 + int n = s.length();
  53 + for(; i < n; ++i)
  54 + switch(s[i]) {
  55 + case '0': case '1': case '2':
  56 + case '3': case '4': case '5':
  57 + case '6': case '7': case '8': case '9': break;
  58 + default: return false;
  59 + }
  60 +
  61 + return true;
  62 +};
  63 +/* ################################################################### */
  64 +static bool isdigit(const std::string * s, int i=0) {
  65 + int n = s->length();
  66 + for(; i < n; ++i)
  67 + switch(s->at(i)) {
  68 + case '0': case '1': case '2':
  69 + case '3': case '4': case '5':
  70 + case '6': case '7': case '8': case '9': break;
  71 + default: return false;
  72 + }
  73 +
  74 + return true;
  75 +};
  76 +/* ################################################################### */
  77 +/*
  78 +Compare strings for opts, so short opt flags come before long format flags.
  79 +For example, -d < --dimension < --dmn, and also lower come before upper. The default STL std::string compare doesn't do that.
  80 +*/
  81 +static bool CmpOptStringPtr(std::string * s1, std::string * s2) {
  82 + int c1,c2;
  83 + const char *s=s1->c_str();
  84 + for(c1=0; c1 < s1->size(); ++c1)
  85 + if (isalnum(s[c1])) // locale sensitive.
  86 + break;
  87 +
  88 + s=s2->c_str();
  89 + for(c2=0; c2 < s2->size(); ++c2)
  90 + if (isalnum(s[c2]))
  91 + break;
  92 +
  93 + // Test which has more symbols before its name.
  94 + if (c1 > c2)
  95 + return false;
  96 + else if (c1 < c2)
  97 + return true;
  98 +
  99 + // Both have same number of symbols, so compare first letter.
  100 + char char1 = s1->at(c1);
  101 + char char2 = s2->at(c2);
  102 + char lo1 = tolower(char1);
  103 + char lo2 = tolower(char2);
  104 +
  105 + if (lo1 != lo2)
  106 + return lo1 < lo2;
  107 +
  108 + // Their case doesn't match, so find which is lower.
  109 + char up1 = isupper(char1);
  110 + char up2 = isupper(char2);
  111 +
  112 + if (up1 && !up2)
  113 + return false;
  114 + else if (!up1 && up2)
  115 + return true;
  116 +
  117 + return (s1->compare(*s2)<0);
  118 +};
  119 +/* ################################################################### */
  120 +/*
  121 +Makes a vector of strings from one string,
  122 +splitting at (and excluding) delimiter "token".
  123 +*/
  124 +static void SplitDelim( const std::string& s, const char token, std::vector<std::string*> * result) {
  125 + std::string::const_iterator i = s.begin();
  126 + std::string::const_iterator j = s.begin();
  127 + const std::string::const_iterator e = s.end();
  128 +
  129 + while(i!=e) {
  130 + while(i!=e && *i++!=token);
  131 + std::string *newstr = new std::string(j, i);
  132 + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1);
  133 + result->push_back(newstr);
  134 + j = i;
  135 + }
  136 +};
  137 +/* ################################################################### */
  138 +// Variant that uses deep copies and references instead of pointers (less efficient).
  139 +static void SplitDelim( const std::string& s, const char token, std::vector<std::string> & result) {
  140 + std::string::const_iterator i = s.begin();
  141 + std::string::const_iterator j = s.begin();
  142 + const std::string::const_iterator e = s.end();
  143 +
  144 + while(i!=e) {
  145 + while(i!=e && *i++!=token);
  146 + std::string newstr(j, i);
  147 + if (newstr.at(newstr.size()-1) == token) newstr.erase(newstr.size()-1);
  148 + result.push_back(newstr);
  149 + j = i;
  150 + }
  151 +};
  152 +/* ################################################################### */
  153 +// Variant that uses list instead of vector for efficient insertion, etc.
  154 +static void SplitDelim( const std::string& s, const char token, std::list<std::string*> & result) {
  155 + std::string::const_iterator i = s.begin();
  156 + std::string::const_iterator j = s.begin();
  157 + const std::string::const_iterator e = s.end();
  158 +
  159 + while(i!=e) {
  160 + while(i!=e && *i++!=token);
  161 + std::string *newstr = new std::string(j, i);
  162 + if (newstr->at(newstr->size()-1) == token) newstr->erase(newstr->size()-1);
  163 + result.push_back(newstr);
  164 + j = i;
  165 + }
  166 +};
  167 +/* ################################################################### */
  168 +static void ToU1(std::string ** strings, unsigned char * out, int n) {
  169 + for(int i=0; i < n; ++i) {
  170 + out[i] = (unsigned char)atoi(strings[i]->c_str());
  171 + }
  172 +};
  173 +/* ################################################################### */
  174 +static void ToS1(std::string ** strings, char * out, int n) {
  175 + for(int i=0; i < n; ++i) {
  176 + out[i] = (char)atoi(strings[i]->c_str());
  177 + }
  178 +};
  179 +/* ################################################################### */
  180 +static void ToU2(std::string ** strings, unsigned short * out, int n) {
  181 + for(int i=0; i < n; ++i) {
  182 + out[i] = (unsigned short)atoi(strings[i]->c_str());
  183 + }
  184 +};
  185 +/* ################################################################### */
  186 +static void ToS2(std::string ** strings, short * out, int n) {
  187 + for(int i=0; i < n; ++i) {
  188 + out[i] = (short)atoi(strings[i]->c_str());
  189 + }
  190 +};
  191 +/* ################################################################### */
  192 +static void ToS4(std::string ** strings, int * out, int n) {
  193 + for(int i=0; i < n; ++i) {
  194 + out[i] = atoi(strings[i]->c_str());
  195 + }
  196 +};
  197 +/* ################################################################### */
  198 +static void ToU4(std::string ** strings, unsigned int * out, int n) {
  199 + for(int i=0; i < n; ++i) {
  200 + out[i] = (unsigned int)strtoul(strings[i]->c_str(), NULL, 0);
  201 + }
  202 +};
  203 +/* ################################################################### */
  204 +static void ToS8(std::string ** strings, long long * out, int n) {
  205 + for(int i=0; i < n; ++i) {
  206 + std::stringstream ss(strings[i]->c_str());
  207 + ss >> out[i];
  208 + }
  209 +};
  210 +/* ################################################################### */
  211 +static void ToU8(std::string ** strings, unsigned long long * out, int n) {
  212 + for(int i=0; i < n; ++i) {
  213 + std::stringstream ss(strings[i]->c_str());
  214 + ss >> out[i];
  215 + }
  216 +};
  217 +/* ################################################################### */
  218 +static void ToF(std::string ** strings, float * out, int n) {
  219 + for(int i=0; i < n; ++i) {
  220 + out[i] = (float)atof(strings[i]->c_str());
  221 + }
  222 +};
  223 +/* ################################################################### */
  224 +static void ToD(std::string ** strings, double * out, int n) {
  225 + for(int i=0; i < n; ++i) {
  226 + out[i] = (double)atof(strings[i]->c_str());
  227 + }
  228 +};
  229 +/* ################################################################### */
  230 +static void StringsToInts(std::vector<std::string> & strings, std::vector<int> & out) {
  231 + for(int i=0; i < strings.size(); ++i) {
  232 + out.push_back(atoi(strings[i].c_str()));
  233 + }
  234 +};
  235 +/* ################################################################### */
  236 +static void StringsToInts(std::vector<std::string*> * strings, std::vector<int> * out) {
  237 + for(int i=0; i < strings->size(); ++i) {
  238 + out->push_back(atoi(strings->at(i)->c_str()));
  239 + }
  240 +};
  241 +/* ################################################################### */
  242 +static void StringsToLongs(std::vector<std::string> & strings, std::vector<long> & out) {
  243 + for(int i=0; i < strings.size(); ++i) {
  244 + out.push_back(atol(strings[i].c_str()));
  245 + }
  246 +};
  247 +/* ################################################################### */
  248 +static void StringsToLongs(std::vector<std::string*> * strings, std::vector<long> * out) {
  249 + for(int i=0; i < strings->size(); ++i) {
  250 + out->push_back(atol(strings->at(i)->c_str()));
  251 + }
  252 +};
  253 +/* ################################################################### */
  254 +static void StringsToULongs(std::vector<std::string> & strings, std::vector<unsigned long> & out) {
  255 + for(int i=0; i < strings.size(); ++i) {
  256 + out.push_back(strtoul(strings[i].c_str(),0,0));
  257 + }
  258 +};
  259 +/* ################################################################### */
  260 +static void StringsToULongs(std::vector<std::string*> * strings, std::vector<unsigned long> * out) {
  261 + for(int i=0; i < strings->size(); ++i) {
  262 + out->push_back(strtoul(strings->at(i)->c_str(),0,0));
  263 + }
  264 +};
  265 +/* ################################################################### */
  266 +static void StringsToFloats(std::vector<std::string> & strings, std::vector<float> & out) {
  267 + for(int i=0; i < strings.size(); ++i) {
  268 + out.push_back(atof(strings[i].c_str()));
  269 + }
  270 +};
  271 +/* ################################################################### */
  272 +static void StringsToFloats(std::vector<std::string*> * strings, std::vector<float> * out) {
  273 + for(int i=0; i < strings->size(); ++i) {
  274 + out->push_back(atof(strings->at(i)->c_str()));
  275 + }
  276 +};
  277 +/* ################################################################### */
  278 +static void StringsToDoubles(std::vector<std::string> & strings, std::vector<double> & out) {
  279 + for(int i=0; i < strings.size(); ++i) {
  280 + out.push_back(atof(strings[i].c_str()));
  281 + }
  282 +};
  283 +/* ################################################################### */
  284 +static void StringsToDoubles(std::vector<std::string*> * strings, std::vector<double> * out) {
  285 + for(int i=0; i < strings->size(); ++i) {
  286 + out->push_back(atof(strings->at(i)->c_str()));
  287 + }
  288 +};
  289 +/* ################################################################### */
  290 +static void StringsToStrings(std::vector<std::string*> * strings, std::vector<std::string> * out) {
  291 + for(int i=0; i < strings->size(); ++i) {
  292 + out->push_back( *strings->at(i) );
  293 + }
  294 +};
  295 +/* ################################################################### */
  296 +static void ToLowerASCII(std::string & s) {
  297 + int n = s.size();
  298 + int i=0;
  299 + char c;
  300 + for(; i < n; ++i) {
  301 + c = s[i];
  302 + if(c<='Z' && c>='A')
  303 + s[i] = c+32;
  304 + }
  305 +}
  306 +/* ################################################################### */
  307 +static char** CommandLineToArgvA(char* CmdLine, int* _argc) {
  308 + char** argv;
  309 + char* _argv;
  310 + unsigned long len;
  311 + unsigned long argc;
  312 + char a;
  313 + unsigned long i, j;
  314 +
  315 + bool in_QM;
  316 + bool in_TEXT;
  317 + bool in_SPACE;
  318 +
  319 + len = strlen(CmdLine);
  320 + i = ((len+2)/2)*sizeof(void*) + sizeof(void*);
  321 +
  322 + argv = (char**)malloc(i + (len+2)*sizeof(char));
  323 +
  324 + _argv = (char*)(((unsigned char*)argv)+i);
  325 +
  326 + argc = 0;
  327 + argv[argc] = _argv;
  328 + in_QM = false;
  329 + in_TEXT = false;
  330 + in_SPACE = true;
  331 + i = 0;
  332 + j = 0;
  333 +
  334 + while( a = CmdLine[i] ) {
  335 + if(in_QM) {
  336 + if( (a == '\"') ||
  337 + (a == '\'')) // rsz. Added single quote.
  338 + {
  339 + in_QM = false;
  340 + } else {
  341 + _argv[j] = a;
  342 + j++;
  343 + }
  344 + } else {
  345 + switch(a) {
  346 + case '\"':
  347 + case '\'': // rsz. Added single quote.
  348 + in_QM = true;
  349 + in_TEXT = true;
  350 + if(in_SPACE) {
  351 + argv[argc] = _argv+j;
  352 + argc++;
  353 + }
  354 + in_SPACE = false;
  355 + break;
  356 + case ' ':
  357 + case '\t':
  358 + case '\n':
  359 + case '\r':
  360 + if(in_TEXT) {
  361 + _argv[j] = '\0';
  362 + j++;
  363 + }
  364 + in_TEXT = false;
  365 + in_SPACE = true;
  366 + break;
  367 + default:
  368 + in_TEXT = true;
  369 + if(in_SPACE) {
  370 + argv[argc] = _argv+j;
  371 + argc++;
  372 + }
  373 + _argv[j] = a;
  374 + j++;
  375 + in_SPACE = false;
  376 + break;
  377 + }
  378 + }
  379 + i++;
  380 + }
  381 + _argv[j] = '\0';
  382 + argv[argc] = NULL;
  383 +
  384 + (*_argc) = argc;
  385 + return argv;
  386 +};
  387 +/* ################################################################### */
  388 +// Create unique ids with static and still allow single header that avoids multiple definitions linker error.
  389 +class ezOptionParserIDGenerator {
  390 +public:
  391 + static ezOptionParserIDGenerator& instance () { static ezOptionParserIDGenerator Generator; return Generator; }
  392 + short next () { return ++_id; }
  393 +private:
  394 + ezOptionParserIDGenerator() : _id(-1) {}
  395 + short _id;
  396 +};
  397 +/* ################################################################### */
  398 +/* Validate a value by checking:
  399 +- if as string, see if converted value is within datatype's limits,
  400 +- and see if falls within a desired range,
  401 +- or see if within set of given list of values.
  402 +
  403 +If comparing with a range, the values list must contain one or two values. One value is required when comparing with <, <=, >, >=. Use two values when requiring a test such as <x<, <=x<, <x<=, <=x<=.
  404 +A regcomp/regexec based class could be created in the future if a need arises.
  405 +*/
  406 +class ezOptionValidator {
  407 +public:
  408 + inline ezOptionValidator(const char* _type, const char* _op=0, const char* list=0, bool _insensitive=false);
  409 + inline ezOptionValidator(char _type);
  410 + inline ezOptionValidator(char _type, char _op, const char* list, int _size);
  411 + inline ezOptionValidator(char _type, char _op, const unsigned char* list, int _size);
  412 + inline ezOptionValidator(char _type, char _op, const short* list, int _size);
  413 + inline ezOptionValidator(char _type, char _op, const unsigned short* list, int _size);
  414 + inline ezOptionValidator(char _type, char _op, const int* list, int _size);
  415 + inline ezOptionValidator(char _type, char _op, const unsigned int* list, int _size);
  416 + inline ezOptionValidator(char _type, char _op, const long long* list, int _size);
  417 + inline ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size=0);
  418 + inline ezOptionValidator(char _type, char _op, const float* list, int _size);
  419 + inline ezOptionValidator(char _type, char _op, const double* list, int _size);
  420 + inline ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive);
  421 + inline ~ezOptionValidator();
  422 +
  423 + inline bool isValid(const std::string * value);
  424 + inline void print();
  425 + inline void reset();
  426 +
  427 + /* If value must be in custom range, use these comparison modes. */
  428 + enum OP { NOOP=0,
  429 + LT, /* value < list[0] */
  430 + LE, /* value <= list[0] */
  431 + GT, /* value > list[0] */
  432 + GE, /* value >= list[0] */
  433 + GTLT, /* list[0] < value < list[1] */
  434 + GELT, /* list[0] <= value < list[1] */
  435 + GELE, /* list[0] <= value <= list[1] */
  436 + GTLE, /* list[0] < value <= list[1] */
  437 + IN /* if value is in list */
  438 + };
  439 +
  440 + enum TYPE { NOTYPE=0, S1, U1, S2, U2, S4, U4, S8, U8, F, D, T };
  441 + enum TYPE2 { NOTYPE2=0, INT8, UINT8, INT16, UINT16, INT32, UINT32, INT64, UINT64, FLOAT, DOUBLE, TEXT };
  442 +
  443 + union {
  444 + unsigned char *u1;
  445 + char *s1;
  446 + unsigned short *u2;
  447 + short *s2;
  448 + unsigned int *u4;
  449 + int *s4;
  450 + unsigned long long *u8;
  451 + long long *s8;
  452 + float *f;
  453 + double *d;
  454 + std::string** t;
  455 + };
  456 +
  457 + char op;
  458 + bool quiet;
  459 + short id;
  460 + char type;
  461 + int size;
  462 + bool insensitive;
  463 +};
  464 +/* ------------------------------------------------------------------- */
  465 +ezOptionValidator::~ezOptionValidator() {
  466 + reset();
  467 +};
  468 +/* ------------------------------------------------------------------- */
  469 +void ezOptionValidator::reset() {
  470 + #define CLEAR(TYPE,P) case TYPE: if (P) delete [] P; P = 0; break;
  471 + switch(type) {
  472 + CLEAR(S1,s1);
  473 + CLEAR(U1,u1);
  474 + CLEAR(S2,s2);
  475 + CLEAR(U2,u2);
  476 + CLEAR(S4,s4);
  477 + CLEAR(U4,u4);
  478 + CLEAR(S8,s8);
  479 + CLEAR(U8,u8);
  480 + CLEAR(F,f);
  481 + CLEAR(D,d);
  482 + case T:
  483 + for(int i=0; i < size; ++i)
  484 + delete t[i];
  485 +
  486 + delete [] t;
  487 + t = 0;
  488 + break;
  489 + default: break;
  490 + }
  491 +
  492 + size = 0;
  493 + op = NOOP;
  494 + type = NOTYPE;
  495 +};
  496 +/* ------------------------------------------------------------------- */
  497 +ezOptionValidator::ezOptionValidator(char _type) : insensitive(0), op(0), size(0), s1(0), type(_type), quiet(0) {
  498 + id = ezOptionParserIDGenerator::instance().next();
  499 +};
  500 +/* ------------------------------------------------------------------- */
  501 +ezOptionValidator::ezOptionValidator(char _type, char _op, const char* list, int _size) : insensitive(0), op(_op), size(_size), s1(0), type(_type), quiet(0) {
  502 + id = ezOptionParserIDGenerator::instance().next();
  503 + s1 = new char[size];
  504 + memcpy(s1, list, size);
  505 +};
  506 +/* ------------------------------------------------------------------- */
  507 +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned char* list, int _size) : insensitive(0), op(_op), size(_size), u1(0), type(_type), quiet(0) {
  508 + id = ezOptionParserIDGenerator::instance().next();
  509 + u1 = new unsigned char[size];
  510 + memcpy(u1, list, size);
  511 +};
  512 +/* ------------------------------------------------------------------- */
  513 +ezOptionValidator::ezOptionValidator(char _type, char _op, const short* list, int _size) : insensitive(0), op(_op), size(_size), s2(0), type(_type), quiet(0) {
  514 + id = ezOptionParserIDGenerator::instance().next();
  515 + s2 = new short[size];
  516 + memcpy(s2, list, size*sizeof(short));
  517 +};
  518 +/* ------------------------------------------------------------------- */
  519 +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned short* list, int _size) : insensitive(0), op(_op), size(_size), u2(0), type(_type), quiet(0) {
  520 + id = ezOptionParserIDGenerator::instance().next();
  521 + u2 = new unsigned short[size];
  522 + memcpy(u2, list, size*sizeof(unsigned short));
  523 +};
  524 +/* ------------------------------------------------------------------- */
  525 +ezOptionValidator::ezOptionValidator(char _type, char _op, const int* list, int _size) : insensitive(0), op(_op), size(_size), s4(0), type(_type), quiet(0) {
  526 + id = ezOptionParserIDGenerator::instance().next();
  527 + s4 = new int[size];
  528 + memcpy(s4, list, size*sizeof(int));
  529 +};
  530 +/* ------------------------------------------------------------------- */
  531 +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned int* list, int _size) : insensitive(0), op(_op), size(_size), u4(0), type(_type), quiet(0) {
  532 + id = ezOptionParserIDGenerator::instance().next();
  533 + u4 = new unsigned int[size];
  534 + memcpy(u4, list, size*sizeof(unsigned int));
  535 +};
  536 +/* ------------------------------------------------------------------- */
  537 +ezOptionValidator::ezOptionValidator(char _type, char _op, const long long* list, int _size) : insensitive(0), op(_op), size(_size), s8(0), type(_type), quiet(0) {
  538 + id = ezOptionParserIDGenerator::instance().next();
  539 + s8 = new long long[size];
  540 + memcpy(s8, list, size*sizeof(long long));
  541 +};
  542 +/* ------------------------------------------------------------------- */
  543 +ezOptionValidator::ezOptionValidator(char _type, char _op, const unsigned long long* list, int _size) : insensitive(0), op(_op), size(_size), u8(0), type(_type), quiet(0) {
  544 + id = ezOptionParserIDGenerator::instance().next();
  545 + u8 = new unsigned long long[size];
  546 + memcpy(u8, list, size*sizeof(unsigned long long));
  547 +};
  548 +/* ------------------------------------------------------------------- */
  549 +ezOptionValidator::ezOptionValidator(char _type, char _op, const float* list, int _size) : insensitive(0), op(_op), size(_size), f(0), type(_type), quiet(0) {
  550 + id = ezOptionParserIDGenerator::instance().next();
  551 + f = new float[size];
  552 + memcpy(f, list, size*sizeof(float));
  553 +};
  554 +/* ------------------------------------------------------------------- */
  555 +ezOptionValidator::ezOptionValidator(char _type, char _op, const double* list, int _size) : insensitive(0), op(_op), size(_size), d(0), type(_type), quiet(0) {
  556 + id = ezOptionParserIDGenerator::instance().next();
  557 + d = new double[size];
  558 + memcpy(d, list, size*sizeof(double));
  559 +};
  560 +/* ------------------------------------------------------------------- */
  561 +ezOptionValidator::ezOptionValidator(char _type, char _op, const char** list, int _size, bool _insensitive) : insensitive(_insensitive), op(_op), size(_size), t(0), type(_type), quiet(0) {
  562 + id = ezOptionParserIDGenerator::instance().next();
  563 + t = new std::string*[size];
  564 + int i=0;
  565 +
  566 + for(; i < size; ++i) {
  567 + t[i] = new std::string(list[i]);
  568 + }
  569 +};
  570 +/* ------------------------------------------------------------------- */
  571 +/* Less efficient but convenient ctor that parses strings to setup validator.
  572 +_type: s1, u1, s2, u2, ..., f, d, t
  573 +_op: lt, gt, ..., in
  574 +_list: comma-delimited string
  575 +*/
  576 +ezOptionValidator::ezOptionValidator(const char* _type, const char* _op, const char* _list, bool _insensitive) : insensitive(_insensitive), size(0), t(0), type(0), quiet(0) {
  577 + id = ezOptionParserIDGenerator::instance().next();
  578 +
  579 + switch(_type[0]) {
  580 + case 'u':
  581 + switch(_type[1]) {
  582 + case '1': type = U1; break;
  583 + case '2': type = U2; break;
  584 + case '4': type = U4; break;
  585 + case '8': type = U8; break;
  586 + default: break;
  587 + }
  588 + break;
  589 + case 's':
  590 + switch(_type[1]) {
  591 + case '1': type = S1;
  592 + break;
  593 + case '2': type = S2; break;
  594 + case '4': type = S4; break;
  595 + case '8': type = S8; break;
  596 + default: break;
  597 + }
  598 + break;
  599 + case 'f': type = F; break;
  600 + case 'd': type = D; break;
  601 + case 't': type = T; break;
  602 + default:
  603 + if (!quiet)
  604 + std::cerr << "ERROR: Unknown validator datatype \"" << _type << "\".\n";
  605 + break;
  606 + }
  607 +
  608 + int nop = 0;
  609 + if (_op != 0)
  610 + nop = strlen(_op);
  611 +
  612 + switch(nop) {
  613 + case 0: op = NOOP; break;
  614 + case 2:
  615 + switch(_op[0]) {
  616 + case 'g':
  617 + switch(_op[1]) {
  618 + case 'e': op = GE; break;
  619 + default: op = GT; break;
  620 + }
  621 + break;
  622 + case 'i': op = IN;
  623 + break;
  624 + default:
  625 + switch(_op[1]) {
  626 + case 'e': op = LE; break;
  627 + default: op = LT; break;
  628 + }
  629 + break;
  630 + }
  631 + break;
  632 + case 4:
  633 + switch(_op[1]) {
  634 + case 'e':
  635 + switch(_op[3]) {
  636 + case 'e': op = GELE; break;
  637 + default: op = GELT; break;
  638 + }
  639 + break;
  640 + default:
  641 + switch(_op[3]) {
  642 + case 'e': op = GTLE; break;
  643 + default: op = GTLT; break;
  644 + }
  645 + break;
  646 + }
  647 + break;
  648 + default:
  649 + if (!quiet)
  650 + std::cerr << "ERROR: Unknown validator operation \"" << _op << "\".\n";
  651 + break;
  652 + }
  653 +
  654 + if (_list == 0) return;
  655 + // Create list of strings and then cast to native datatypes.
  656 + std::string unsplit(_list);
  657 + std::list<std::string*> split;
  658 + std::list<std::string*>::iterator it;
  659 + SplitDelim(unsplit, ',', split);
  660 + size = split.size();
  661 + std::string **strings = new std::string*[size];
  662 +
  663 + int i = 0;
  664 + for(it = split.begin(); it != split.end(); ++it)
  665 + strings[i++] = *it;
  666 +
  667 + if (insensitive)
  668 + for(i=0; i < size; ++i)
  669 + ToLowerASCII(*strings[i]);
  670 +
  671 + #define FreeStrings() { \
  672 + for(i=0; i < size; ++i)\
  673 + delete strings[i];\
  674 + delete [] strings;\
  675 + }
  676 +
  677 + #define ToArray(T,P,Y) case T: P = new Y[size]; To##T(strings, P, size); FreeStrings(); break;
  678 + switch(type) {
  679 + ToArray(S1,s1,char);
  680 + ToArray(U1,u1,unsigned char);
  681 + ToArray(S2,s2,short);
  682 + ToArray(U2,u2,unsigned short);
  683 + ToArray(S4,s4,int);
  684 + ToArray(U4,u4,unsigned int);
  685 + ToArray(S8,s8,long long);
  686 + ToArray(U8,u8,unsigned long long);
  687 + ToArray(F,f,float);
  688 + ToArray(D,d,double);
  689 + case T: t = strings; break; /* Don't erase strings array. */
  690 + default: break;
  691 + }
  692 +};
  693 +/* ------------------------------------------------------------------- */
  694 +void ezOptionValidator::print() {
  695 + printf("id=%d, op=%d, type=%d, size=%d, insensitive=%d\n", id, op, type, size, insensitive);
  696 +};
  697 +/* ------------------------------------------------------------------- */
  698 +bool ezOptionValidator::isValid(const std::string * valueAsString) {
  699 + if (valueAsString == 0) return false;
  700 +
  701 +#define CHECKRANGE(E,T) {\
  702 + std::stringstream ss(valueAsString->c_str()); \
  703 + long long E##value; \
  704 + ss >> E##value; \
  705 + long long E##min = static_cast<long long>(std::numeric_limits<T>::min()); \
  706 + if (E##value < E##min) { \
  707 + if (!quiet) \
  708 + std::cerr << "ERROR: Invalid value " << E##value << " is less than datatype min " << E##min << ".\n"; \
  709 + return false; \
  710 + } \
  711 + \
  712 + long long E##max = static_cast<long long>(std::numeric_limits<T>::max()); \
  713 + if (E##value > E##max) { \
  714 + if (!quiet) \
  715 + std::cerr << "ERROR: Invalid value " << E##value << " is greater than datatype max " << E##max << ".\n"; \
  716 + return false; \
  717 + } \
  718 +}
  719 + // Check if within datatype limits.
  720 + if (type != T) {
  721 + switch(type) {
  722 + case S1: CHECKRANGE(S1,char); break;
  723 + case U1: CHECKRANGE(U1,unsigned char); break;
  724 + case S2: CHECKRANGE(S2,short); break;
  725 + case U2: CHECKRANGE(U2,unsigned short); break;
  726 + case S4: CHECKRANGE(S4,int); break;
  727 + case U4: CHECKRANGE(U4,unsigned int); break;
  728 + case S8: {
  729 + if ( (valueAsString->at(0) == '-') &&
  730 + isdigit(valueAsString,1) &&
  731 + (valueAsString->size() > 19) &&
  732 + (valueAsString->compare(1, 19, "9223372036854775808") > 0) ) {
  733 + if (!quiet)
  734 + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min -9223372036854775808.\n";
  735 + return false;
  736 + }
  737 +
  738 + if (isdigit(valueAsString) &&
  739 + (valueAsString->size() > 18) &&
  740 + valueAsString->compare("9223372036854775807") > 0) {
  741 + if (!quiet)
  742 + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 9223372036854775807.\n";
  743 + return false;
  744 + }
  745 + } break;
  746 + case U8: {
  747 + if (valueAsString->compare("0") < 0) {
  748 + if (!quiet)
  749 + std::cerr << "ERROR: Invalid value " << *valueAsString << " is less than datatype min 0.\n";
  750 + return false;
  751 + }
  752 +
  753 + if (isdigit(valueAsString) &&
  754 + (valueAsString->size() > 19) &&
  755 + valueAsString->compare("18446744073709551615") > 0) {
  756 + if (!quiet)
  757 + std::cerr << "ERROR: Invalid value " << *valueAsString << " is greater than datatype max 18446744073709551615.\n";
  758 + return false;
  759 + }
  760 + } break;
  761 + case F: {
  762 + double dmax = static_cast<double>(std::numeric_limits<float>::max());
  763 + double dvalue = atof(valueAsString->c_str());
  764 + double dmin = -dmax;
  765 + if (dvalue < dmin) {
  766 + if (!quiet) {
  767 + fprintf(stderr, "ERROR: Invalid value %g is less than datatype min %g.\n", dvalue, dmin);
  768 + }
  769 + return false;
  770 + }
  771 +
  772 + if (dvalue > dmax) {
  773 + if (!quiet)
  774 + std::cerr << "ERROR: Invalid value " << dvalue << " is greater than datatype max " << dmax << ".\n";
  775 + return false;
  776 + }
  777 + } break;
  778 + case D: {
  779 + long double ldmax = static_cast<long double>(std::numeric_limits<double>::max());
  780 + std::stringstream ss(valueAsString->c_str());
  781 + long double ldvalue;
  782 + ss >> ldvalue;
  783 + long double ldmin = -ldmax;
  784 +
  785 + if (ldvalue < ldmin) {
  786 + if (!quiet)
  787 + std::cerr << "ERROR: Invalid value " << ldvalue << " is less than datatype min " << ldmin << ".\n";
  788 + return false;
  789 + }
  790 +
  791 + if (ldvalue > ldmax) {
  792 + if (!quiet)
  793 + std::cerr << "ERROR: Invalid value " << ldvalue << " is greater than datatype max " << ldmax << ".\n";
  794 + return false;
  795 + }
  796 + } break;
  797 + case NOTYPE: default: break;
  798 + }
  799 + } else {
  800 + if (op == IN) {
  801 + int i=0;
  802 + if (insensitive) {
  803 + std::string valueAsStringLower(*valueAsString);
  804 + ToLowerASCII(valueAsStringLower);
  805 + for(; i < size; ++i) {
  806 + if (valueAsStringLower.compare(t[i]->c_str()) == 0)
  807 + return true;
  808 + }
  809 + } else {
  810 + for(; i < size; ++i) {
  811 + if (valueAsString->compare(t[i]->c_str()) == 0)
  812 + return true;
  813 + }
  814 + }
  815 + return false;
  816 + }
  817 + }
  818 +
  819 + // Only check datatype limits, and return;
  820 + if (op == NOOP) return true;
  821 +
  822 +#define VALIDATE(T, U, LIST) { \
  823 + /* Value string converted to true native type. */ \
  824 + std::stringstream ss(valueAsString->c_str());\
  825 + U v;\
  826 + ss >> v;\
  827 + /* Check if within list. */ \
  828 + if (op == IN) { \
  829 + T * last = LIST + size;\
  830 + return (last != std::find(LIST, last, v)); \
  831 + } \
  832 + \
  833 + /* Check if within user's custom range. */ \
  834 + T v0, v1; \
  835 + if (size > 0) { \
  836 + v0 = LIST[0]; \
  837 + } \
  838 + \
  839 + if (size > 1) { \
  840 + v1 = LIST[1]; \
  841 + } \
  842 + \
  843 + switch (op) {\
  844 + case LT:\
  845 + if (size > 0) {\
  846 + return v < v0;\
  847 + } else {\
  848 + std::cerr << "ERROR: No value given to validate if " << v << " < X.\n";\
  849 + return false;\
  850 + }\
  851 + break;\
  852 + case LE:\
  853 + if (size > 0) {\
  854 + return v <= v0;\
  855 + } else {\
  856 + std::cerr << "ERROR: No value given to validate if " << v << " <= X.\n";\
  857 + return false;\
  858 + }\
  859 + break;\
  860 + case GT:\
  861 + if (size > 0) {\
  862 + return v > v0;\
  863 + } else {\
  864 + std::cerr << "ERROR: No value given to validate if " << v << " > X.\n";\
  865 + return false;\
  866 + }\
  867 + break;\
  868 + case GE:\
  869 + if (size > 0) {\
  870 + return v >= v0;\
  871 + } else {\
  872 + std::cerr << "ERROR: No value given to validate if " << v << " >= X.\n";\
  873 + return false;\
  874 + }\
  875 + break;\
  876 + case GTLT:\
  877 + if (size > 1) {\
  878 + return (v0 < v) && (v < v1);\
  879 + } else {\
  880 + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " < X2.\n";\
  881 + return false;\
  882 + }\
  883 + break;\
  884 + case GELT:\
  885 + if (size > 1) {\
  886 + return (v0 <= v) && (v < v1);\
  887 + } else {\
  888 + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " < X2.\n";\
  889 + return false;\
  890 + }\
  891 + break;\
  892 + case GELE:\
  893 + if (size > 1) {\
  894 + return (v0 <= v) && (v <= v1);\
  895 + } else {\
  896 + std::cerr << "ERROR: Missing values to validate if X1 <= " << v << " <= X2.\n";\
  897 + return false;\
  898 + }\
  899 + break;\
  900 + case GTLE:\
  901 + if (size > 1) {\
  902 + return (v0 < v) && (v <= v1);\
  903 + } else {\
  904 + std::cerr << "ERROR: Missing values to validate if X1 < " << v << " <= X2.\n";\
  905 + return false;\
  906 + }\
  907 + break;\
  908 + case NOOP: case IN: default: break;\
  909 + } \
  910 + }
  911 +
  912 + switch(type) {
  913 + case U1: VALIDATE(unsigned char, int, u1); break;
  914 + case S1: VALIDATE(char, int, s1); break;
  915 + case U2: VALIDATE(unsigned short, int, u2); break;
  916 + case S2: VALIDATE(short, int, s2); break;
  917 + case U4: VALIDATE(unsigned int, unsigned int, u4); break;
  918 + case S4: VALIDATE(int, int, s4); break;
  919 + case U8: VALIDATE(unsigned long long, unsigned long long, u8); break;
  920 + case S8: VALIDATE(long long, long long, s8); break;
  921 + case F: VALIDATE(float, float, f); break;
  922 + case D: VALIDATE(double, double, d); break;
  923 + default: break;
  924 + }
  925 +
  926 + return true;
  927 +};
  928 +/* ################################################################### */
  929 +class OptionGroup {
  930 +public:
  931 + OptionGroup() : delim(0), expectArgs(0), isSet(false), isRequired(false) { }
  932 +
  933 + ~OptionGroup() {
  934 + int i, j;
  935 + for(i=0; i < flags.size(); ++i)
  936 + delete flags[i];
  937 +
  938 + flags.clear();
  939 + parseIndex.clear();
  940 + clearArgs();
  941 + };
  942 +
  943 + inline void clearArgs();
  944 + inline void getInt(int&);
  945 + inline void getLong(long&);
  946 + inline void getLongLong(long long&);
  947 + inline void getULong(unsigned long&);
  948 + inline void getULongLong(unsigned long long&);
  949 + inline void getFloat(float&);
  950 + inline void getDouble(double&);
  951 + inline void getString(std::string&);
  952 + inline void getInts(std::vector<int>&);
  953 + inline void getLongs(std::vector<long>&);
  954 + inline void getULongs(std::vector<unsigned long>&);
  955 + inline void getFloats(std::vector<float>&);
  956 + inline void getDoubles(std::vector<double>&);
  957 + inline void getStrings(std::vector<std::string>&);
  958 + inline void getMultiInts(std::vector< std::vector<int> >&);
  959 + inline void getMultiLongs(std::vector< std::vector<long> >&);
  960 + inline void getMultiULongs(std::vector< std::vector<unsigned long> >&);
  961 + inline void getMultiFloats(std::vector< std::vector<float> >&);
  962 + inline void getMultiDoubles(std::vector< std::vector<double> >&);
  963 + inline void getMultiStrings(std::vector< std::vector<std::string> >&);
  964 +
  965 + // defaults value regardless of being set by user.
  966 + std::string defaults;
  967 + // If expects arguments, this will delimit arg list.
  968 + char delim;
  969 + // If not 0, then number of delimited args. -1 for arbitrary number.
  970 + int expectArgs;
  971 + // Descriptive help message shown in usage instructions for option.
  972 + std::string help;
  973 + // 0 or 1.
  974 + bool isRequired;
  975 + // A list of flags that denote this option, i.e. -d, --dimension.
  976 + std::vector< std::string* > flags;
  977 + // If was set (or found).
  978 + bool isSet;
  979 + // Lists of arguments, per flag instance, after splitting by delimiter.
  980 + std::vector< std::vector< std::string* > * > args;
  981 + // Index where each group was parsed from input stream to track order.
  982 + std::vector<int> parseIndex;
  983 +};
  984 +/* ################################################################### */
  985 +void OptionGroup::clearArgs() {
  986 + int i,j;
  987 + for(i=0; i < args.size(); ++i) {
  988 + for(j=0; j < args[i]->size(); ++j)
  989 + delete args[i]->at(j);
  990 +
  991 + delete args[i];
  992 + }
  993 +
  994 + args.clear();
  995 + isSet = false;
  996 +};
  997 +/* ################################################################### */
  998 +void OptionGroup::getInt(int & out) {
  999 + if (!isSet) {
  1000 + if (defaults.empty())
  1001 + out = 0;
  1002 + else
  1003 + out = atoi(defaults.c_str());
  1004 + } else {
  1005 + if (args.empty() || args[0]->empty())
  1006 + out = 0;
  1007 + else {
  1008 + out = atoi(args[0]->at(0)->c_str());
  1009 + }
  1010 + }
  1011 +};
  1012 +/* ################################################################### */
  1013 +void OptionGroup::getLong(long & out) {
  1014 + if (!isSet) {
  1015 + if (defaults.empty())
  1016 + out = 0;
  1017 + else
  1018 + out = atoi(defaults.c_str());
  1019 + } else {
  1020 + if (args.empty() || args[0]->empty())
  1021 + out = 0;
  1022 + else {
  1023 + out = atol(args[0]->at(0)->c_str());
  1024 + }
  1025 + }
  1026 +};
  1027 +/* ################################################################### */
  1028 +void OptionGroup::getLongLong(long long & out) {
  1029 + if (!isSet) {
  1030 + if (defaults.empty())
  1031 + out = 0;
  1032 + else {
  1033 + std::stringstream ss(defaults.c_str());
  1034 + ss >> out;
  1035 + }
  1036 + } else {
  1037 + if (args.empty() || args[0]->empty())
  1038 + out = 0;
  1039 + else {
  1040 + std::stringstream ss(args[0]->at(0)->c_str());
  1041 + ss >> out;
  1042 + }
  1043 + }
  1044 +};
  1045 +/* ################################################################### */
  1046 +void OptionGroup::getULong(unsigned long & out) {
  1047 + if (!isSet) {
  1048 + if (defaults.empty())
  1049 + out = 0;
  1050 + else
  1051 + out = atoi(defaults.c_str());
  1052 + } else {
  1053 + if (args.empty() || args[0]->empty())
  1054 + out = 0;
  1055 + else {
  1056 + out = strtoul(args[0]->at(0)->c_str(),0,0);
  1057 + }
  1058 + }
  1059 +};
  1060 +/* ################################################################### */
  1061 +void OptionGroup::getULongLong(unsigned long long & out) {
  1062 + if (!isSet) {
  1063 + if (defaults.empty())
  1064 + out = 0;
  1065 + else {
  1066 + std::stringstream ss(defaults.c_str());
  1067 + ss >> out;
  1068 + }
  1069 + } else {
  1070 + if (args.empty() || args[0]->empty())
  1071 + out = 0;
  1072 + else {
  1073 + std::stringstream ss(args[0]->at(0)->c_str());
  1074 + ss >> out;
  1075 + }
  1076 + }
  1077 +};
  1078 +/* ################################################################### */
  1079 +void OptionGroup::getFloat(float & out) {
  1080 + if (!isSet) {
  1081 + if (defaults.empty())
  1082 + out = 0.0;
  1083 + else
  1084 + out = (float)atof(defaults.c_str());
  1085 + } else {
  1086 + if (args.empty() || args[0]->empty())
  1087 + out = 0.0;
  1088 + else {
  1089 + out = (float)atof(args[0]->at(0)->c_str());
  1090 + }
  1091 + }
  1092 +};
  1093 +/* ################################################################### */
  1094 +void OptionGroup::getDouble(double & out) {
  1095 + if (!isSet) {
  1096 + if (defaults.empty())
  1097 + out = 0.0;
  1098 + else
  1099 + out = atof(defaults.c_str());
  1100 + } else {
  1101 + if (args.empty() || args[0]->empty())
  1102 + out = 0.0;
  1103 + else {
  1104 + out = atof(args[0]->at(0)->c_str());
  1105 + }
  1106 + }
  1107 +};
  1108 +/* ################################################################### */
  1109 +void OptionGroup::getString(std::string & out) {
  1110 + if (!isSet) {
  1111 + out = defaults;
  1112 + } else {
  1113 + if (args.empty() || args[0]->empty())
  1114 + out = "";
  1115 + else {
  1116 + out = *args[0]->at(0);
  1117 + }
  1118 + }
  1119 +};
  1120 +/* ################################################################### */
  1121 +void OptionGroup::getInts(std::vector<int> & out) {
  1122 + if (!isSet) {
  1123 + if (!defaults.empty()) {
  1124 + std::vector< std::string > strings;
  1125 + SplitDelim(defaults, delim, strings);
  1126 + StringsToInts(strings, out);
  1127 + }
  1128 + } else {
  1129 + if (!(args.empty() || args[0]->empty()))
  1130 + StringsToInts(args[0], &out);
  1131 + }
  1132 +};
  1133 +/* ################################################################### */
  1134 +void OptionGroup::getLongs(std::vector<long> & out) {
  1135 + if (!isSet) {
  1136 + if (!defaults.empty()) {
  1137 + std::vector< std::string > strings;
  1138 + SplitDelim(defaults, delim, strings);
  1139 + StringsToLongs(strings, out);
  1140 + }
  1141 + } else {
  1142 + if (!(args.empty() || args[0]->empty()))
  1143 + StringsToLongs(args[0], &out);
  1144 + }
  1145 +};
  1146 +/* ################################################################### */
  1147 +void OptionGroup::getULongs(std::vector<unsigned long> & out) {
  1148 + if (!isSet) {
  1149 + if (!defaults.empty()) {
  1150 + std::vector< std::string > strings;
  1151 + SplitDelim(defaults, delim, strings);
  1152 + StringsToULongs(strings, out);
  1153 + }
  1154 + } else {
  1155 + if (!(args.empty() || args[0]->empty()))
  1156 + StringsToULongs(args[0], &out);
  1157 + }
  1158 +};
  1159 +/* ################################################################### */
  1160 +void OptionGroup::getFloats(std::vector<float> & out) {
  1161 + if (!isSet) {
  1162 + if (!defaults.empty()) {
  1163 + std::vector< std::string > strings;
  1164 + SplitDelim(defaults, delim, strings);
  1165 + StringsToFloats(strings, out);
  1166 + }
  1167 + } else {
  1168 + if (!(args.empty() || args[0]->empty()))
  1169 + StringsToFloats(args[0], &out);
  1170 + }
  1171 +};
  1172 +/* ################################################################### */
  1173 +void OptionGroup::getDoubles(std::vector<double> & out) {
  1174 + if (!isSet) {
  1175 + if (!defaults.empty()) {
  1176 + std::vector< std::string > strings;
  1177 + SplitDelim(defaults, delim, strings);
  1178 + StringsToDoubles(strings, out);
  1179 + }
  1180 + } else {
  1181 + if (!(args.empty() || args[0]->empty()))
  1182 + StringsToDoubles(args[0], &out);
  1183 + }
  1184 +};
  1185 +/* ################################################################### */
  1186 +void OptionGroup::getStrings(std::vector<std::string>& out) {
  1187 + if (!isSet) {
  1188 + if (!defaults.empty()) {
  1189 + SplitDelim(defaults, delim, out);
  1190 + }
  1191 + } else {
  1192 + if (!(args.empty() || args[0]->empty()))
  1193 + StringsToStrings(args[0], &out);
  1194 + }
  1195 +};
  1196 +/* ################################################################### */
  1197 +void OptionGroup::getMultiInts(std::vector< std::vector<int> >& out) {
  1198 + if (!isSet) {
  1199 + if (!defaults.empty()) {
  1200 + std::vector< std::string > strings;
  1201 + SplitDelim(defaults, delim, strings);
  1202 + if (out.size() < 1) out.resize(1);
  1203 + StringsToInts(strings, out[0]);
  1204 + }
  1205 + } else {
  1206 + if (!args.empty()) {
  1207 + int n = args.size();
  1208 + if (out.size() < n) out.resize(n);
  1209 + for(int i=0; i < n; ++i) {
  1210 + StringsToInts(args[i], &out[i]);
  1211 + }
  1212 + }
  1213 + }
  1214 +};
  1215 +/* ################################################################### */
  1216 +void OptionGroup::getMultiLongs(std::vector< std::vector<long> >& out) {
  1217 + if (!isSet) {
  1218 + if (!defaults.empty()) {
  1219 + std::vector< std::string > strings;
  1220 + SplitDelim(defaults, delim, strings);
  1221 + if (out.size() < 1) out.resize(1);
  1222 + StringsToLongs(strings, out[0]);
  1223 + }
  1224 + } else {
  1225 + if (!args.empty()) {
  1226 + int n = args.size();
  1227 + if (out.size() < n) out.resize(n);
  1228 + for(int i=0; i < n; ++i) {
  1229 + StringsToLongs(args[i], &out[i]);
  1230 + }
  1231 + }
  1232 + }
  1233 +};
  1234 +/* ################################################################### */
  1235 +void OptionGroup::getMultiULongs(std::vector< std::vector<unsigned long> >& out) {
  1236 + if (!isSet) {
  1237 + if (!defaults.empty()) {
  1238 + std::vector< std::string > strings;
  1239 + SplitDelim(defaults, delim, strings);
  1240 + if (out.size() < 1) out.resize(1);
  1241 + StringsToULongs(strings, out[0]);
  1242 + }
  1243 + } else {
  1244 + if (!args.empty()) {
  1245 + int n = args.size();
  1246 + if (out.size() < n) out.resize(n);
  1247 + for(int i=0; i < n; ++i) {
  1248 + StringsToULongs(args[i], &out[i]);
  1249 + }
  1250 + }
  1251 + }
  1252 +};
  1253 +/* ################################################################### */
  1254 +void OptionGroup::getMultiFloats(std::vector< std::vector<float> >& out) {
  1255 + if (!isSet) {
  1256 + if (!defaults.empty()) {
  1257 + std::vector< std::string > strings;
  1258 + SplitDelim(defaults, delim, strings);
  1259 + if (out.size() < 1) out.resize(1);
  1260 + StringsToFloats(strings, out[0]);
  1261 + }
  1262 + } else {
  1263 + if (!args.empty()) {
  1264 + int n = args.size();
  1265 + if (out.size() < n) out.resize(n);
  1266 + for(int i=0; i < n; ++i) {
  1267 + StringsToFloats(args[i], &out[i]);
  1268 + }
  1269 + }
  1270 + }
  1271 +};
  1272 +/* ################################################################### */
  1273 +void OptionGroup::getMultiDoubles(std::vector< std::vector<double> >& out) {
  1274 + if (!isSet) {
  1275 + if (!defaults.empty()) {
  1276 + std::vector< std::string > strings;
  1277 + SplitDelim(defaults, delim, strings);
  1278 + if (out.size() < 1) out.resize(1);
  1279 + StringsToDoubles(strings, out[0]);
  1280 + }
  1281 + } else {
  1282 + if (!args.empty()) {
  1283 + int n = args.size();
  1284 + if (out.size() < n) out.resize(n);
  1285 + for(int i=0; i < n; ++i) {
  1286 + StringsToDoubles(args[i], &out[i]);
  1287 + }
  1288 + }
  1289 + }
  1290 +};
  1291 +/* ################################################################### */
  1292 +void OptionGroup::getMultiStrings(std::vector< std::vector<std::string> >& out) {
  1293 + if (!isSet) {
  1294 + if (!defaults.empty()) {
  1295 + std::vector< std::string > strings;
  1296 + SplitDelim(defaults, delim, strings);
  1297 + if (out.size() < 1) out.resize(1);
  1298 + out[0] = strings;
  1299 + }
  1300 + } else {
  1301 + if (!args.empty()) {
  1302 + int n = args.size();
  1303 + if (out.size() < n) out.resize(n);
  1304 +
  1305 + for(int i=0; i < n; ++i) {
  1306 + for(int j=0; j < args[i]->size(); ++j)
  1307 + out[i].push_back( *args[i]->at(j) );
  1308 + }
  1309 + }
  1310 + }
  1311 +};
  1312 +/* ################################################################### */
  1313 +typedef std::map< int, ezOptionValidator* > ValidatorMap;
  1314 +
  1315 +class ezOptionParser {
  1316 +public:
  1317 + // How to layout usage descriptions with the option flags.
  1318 + enum Layout { ALIGN, INTERLEAVE, STAGGER };
  1319 +
  1320 + inline ~ezOptionParser();
  1321 +
  1322 + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator=0);
  1323 + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator=0);
  1324 + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator=0);
  1325 + inline void add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator=0);
  1326 + inline bool exportFile(const char * filename, bool all=false);
  1327 + inline OptionGroup * get(const char * name);
  1328 + inline void getUsage(std::string & usage, int width=80, Layout layout=ALIGN);
  1329 + inline void getUsageDescriptions(std::string & usage, int width=80, Layout layout=STAGGER);
  1330 + inline bool gotExpected(std::vector<std::string> & badOptions);
  1331 + inline bool gotRequired(std::vector<std::string> & badOptions);
  1332 + inline bool gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs);
  1333 + inline bool importFile(const char * filename, char comment='#');
  1334 + inline int isSet(const char * name);
  1335 + inline int isSet(std::string & name);
  1336 + inline void parse(int argc, const char * argv[]);
  1337 + inline void prettyPrint(std::string & out);
  1338 + inline void reset();
  1339 + inline void resetArgs();
  1340 +
  1341 + // Insert extra empty line betwee each option's usage description.
  1342 + char doublespace;
  1343 + // General description in human language on what the user's tool does.
  1344 + // It's the first section to get printed in the full usage message.
  1345 + std::string overview;
  1346 + // A synopsis of command and options usage to show expected order of input arguments.
  1347 + // It's the second section to get printed in the full usage message.
  1348 + std::string syntax;
  1349 + // Example (third) section in usage message.
  1350 + std::string example;
  1351 + // Final section printed in usage message. For contact, copyrights, version info.
  1352 + std::string footer;
  1353 + // Map from an option to an Id of its parent group.
  1354 + std::map< std::string, int > optionGroupIds;
  1355 + // Unordered collection of the option groups.
  1356 + std::vector< OptionGroup* > groups;
  1357 + // Store unexpected args in input.
  1358 + std::vector< std::string* > unknownArgs;
  1359 + // List of args that occur left-most before first option flag.
  1360 + std::vector< std::string* > firstArgs;
  1361 + // List of args that occur after last right-most option flag and its args.
  1362 + std::vector< std::string* > lastArgs;
  1363 + // List of validators.
  1364 + ValidatorMap validators;
  1365 + // Maps group id to a validator index into vector of validators. Validator index is -1 if there is no validator for group.
  1366 + std::map< int, int > groupValidators;
  1367 +};
  1368 +/* ################################################################### */
  1369 +ezOptionParser::~ezOptionParser() {
  1370 + reset();
  1371 +}
  1372 +/* ################################################################### */
  1373 +void ezOptionParser::reset() {
  1374 + this->doublespace = 1;
  1375 +
  1376 + int i;
  1377 + for(i=0; i < groups.size(); ++i)
  1378 + delete groups[i];
  1379 + groups.clear();
  1380 +
  1381 + for(i=0; i < unknownArgs.size(); ++i)
  1382 + delete unknownArgs[i];
  1383 + unknownArgs.clear();
  1384 +
  1385 + for(i=0; i < firstArgs.size(); ++i)
  1386 + delete firstArgs[i];
  1387 + firstArgs.clear();
  1388 +
  1389 + for(i=0; i < lastArgs.size(); ++i)
  1390 + delete lastArgs[i];
  1391 + lastArgs.clear();
  1392 +
  1393 + ValidatorMap::iterator it;
  1394 + for(it = validators.begin(); it != validators.end(); ++it)
  1395 + delete it->second;
  1396 +
  1397 + validators.clear();
  1398 + optionGroupIds.clear();
  1399 + groupValidators.clear();
  1400 +};
  1401 +/* ################################################################### */
  1402 +void ezOptionParser::resetArgs() {
  1403 + int i;
  1404 + for(i=0; i < groups.size(); ++i)
  1405 + groups[i]->clearArgs();
  1406 +
  1407 + for(i=0; i < unknownArgs.size(); ++i)
  1408 + delete unknownArgs[i];
  1409 + unknownArgs.clear();
  1410 +
  1411 + for(i=0; i < firstArgs.size(); ++i)
  1412 + delete firstArgs[i];
  1413 + firstArgs.clear();
  1414 +
  1415 + for(i=0; i < lastArgs.size(); ++i)
  1416 + delete lastArgs[i];
  1417 + lastArgs.clear();
  1418 +};
  1419 +/* ################################################################### */
  1420 +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, ezOptionValidator* validator) {
  1421 + int id = this->groups.size();
  1422 + OptionGroup * g = new OptionGroup;
  1423 + g->defaults = defaults;
  1424 + g->isRequired = required;
  1425 + g->expectArgs = expectArgs;
  1426 + g->delim = delim;
  1427 + g->isSet = 0;
  1428 + g->help = help;
  1429 + std::string *f1 = new std::string(flag1);
  1430 + g->flags.push_back( f1 );
  1431 + this->optionGroupIds[flag1] = id;
  1432 + this->groups.push_back(g);
  1433 +
  1434 + if (validator) {
  1435 + int vid = validator->id;
  1436 + validators[vid] = validator;
  1437 + groupValidators[id] = vid;
  1438 + } else {
  1439 + groupValidators[id] = -1;
  1440 + }
  1441 +};
  1442 +/* ################################################################### */
  1443 +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, ezOptionValidator* validator) {
  1444 + int id = this->groups.size();
  1445 + OptionGroup * g = new OptionGroup;
  1446 + g->defaults = defaults;
  1447 + g->isRequired = required;
  1448 + g->expectArgs = expectArgs;
  1449 + g->delim = delim;
  1450 + g->isSet = 0;
  1451 + g->help = help;
  1452 + std::string *f1 = new std::string(flag1);
  1453 + g->flags.push_back( f1 );
  1454 + std::string *f2 = new std::string(flag2);
  1455 + g->flags.push_back( f2 );
  1456 + this->optionGroupIds[flag1] = id;
  1457 + this->optionGroupIds[flag2] = id;
  1458 +
  1459 + this->groups.push_back(g);
  1460 +
  1461 + if (validator) {
  1462 + int vid = validator->id;
  1463 + validators[vid] = validator;
  1464 + groupValidators[id] = vid;
  1465 + } else {
  1466 + groupValidators[id] = -1;
  1467 + }
  1468 +};
  1469 +/* ################################################################### */
  1470 +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, ezOptionValidator* validator) {
  1471 + int id = this->groups.size();
  1472 + OptionGroup * g = new OptionGroup;
  1473 + g->defaults = defaults;
  1474 + g->isRequired = required;
  1475 + g->expectArgs = expectArgs;
  1476 + g->delim = delim;
  1477 + g->isSet = 0;
  1478 + g->help = help;
  1479 + std::string *f1 = new std::string(flag1);
  1480 + g->flags.push_back( f1 );
  1481 + std::string *f2 = new std::string(flag2);
  1482 + g->flags.push_back( f2 );
  1483 + std::string *f3 = new std::string(flag3);
  1484 + g->flags.push_back( f3 );
  1485 + this->optionGroupIds[flag1] = id;
  1486 + this->optionGroupIds[flag2] = id;
  1487 + this->optionGroupIds[flag3] = id;
  1488 +
  1489 + this->groups.push_back(g);
  1490 +
  1491 + if (validator) {
  1492 + int vid = validator->id;
  1493 + validators[vid] = validator;
  1494 + groupValidators[id] = vid;
  1495 + } else {
  1496 + groupValidators[id] = -1;
  1497 + }
  1498 +};
  1499 +/* ################################################################### */
  1500 +void ezOptionParser::add(const char * defaults, bool required, int expectArgs, char delim, const char * help, const char * flag1, const char * flag2, const char * flag3, const char * flag4, ezOptionValidator* validator) {
  1501 + int id = this->groups.size();
  1502 + OptionGroup * g = new OptionGroup;
  1503 + g->defaults = defaults;
  1504 + g->isRequired = required;
  1505 + g->expectArgs = expectArgs;
  1506 + g->delim = delim;
  1507 + g->isSet = 0;
  1508 + g->help = help;
  1509 + std::string *f1 = new std::string(flag1);
  1510 + g->flags.push_back( f1 );
  1511 + std::string *f2 = new std::string(flag2);
  1512 + g->flags.push_back( f2 );
  1513 + std::string *f3 = new std::string(flag3);
  1514 + g->flags.push_back( f3 );
  1515 + std::string *f4 = new std::string(flag4);
  1516 + g->flags.push_back( f4 );
  1517 + this->optionGroupIds[flag1] = id;
  1518 + this->optionGroupIds[flag2] = id;
  1519 + this->optionGroupIds[flag3] = id;
  1520 + this->optionGroupIds[flag4] = id;
  1521 +
  1522 + this->groups.push_back(g);
  1523 +
  1524 + if (validator) {
  1525 + int vid = validator->id;
  1526 + validators[vid] = validator;
  1527 + groupValidators[id] = vid;
  1528 + } else {
  1529 + groupValidators[id] = -1;
  1530 + }
  1531 +};
  1532 +/* ################################################################### */
  1533 +bool ezOptionParser::exportFile(const char * filename, bool all) {
  1534 + int i;
  1535 + std::string out;
  1536 + bool quote;
  1537 +
  1538 + // Export the first args, except the program name, so start from 1.
  1539 + for(i=1; i < firstArgs.size(); ++i) {
  1540 + quote = ((firstArgs[i]->find_first_of(" \t") != std::string::npos) && (firstArgs[i]->find_first_of("\'\"") == std::string::npos));
  1541 +
  1542 + if (quote)
  1543 + out.append("\"");
  1544 +
  1545 + out.append(*firstArgs[i]);
  1546 + if (quote)
  1547 + out.append("\"");
  1548 +
  1549 + out.append(" ");
  1550 + }
  1551 +
  1552 + if (firstArgs.size() > 1)
  1553 + out.append("\n");
  1554 +
  1555 + std::vector<std::string* > stringPtrs(groups.size());
  1556 + int j,m;
  1557 + int n = groups.size();
  1558 + for(i=0; i < n; ++i) {
  1559 + stringPtrs[i] = groups[i]->flags[0];
  1560 + }
  1561 +
  1562 + OptionGroup *g;
  1563 + // Sort first flag of each group with other groups.
  1564 + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr);
  1565 + for(i=0; i < n; ++i) {
  1566 + g = get(stringPtrs[i]->c_str());
  1567 + if (g->isSet || all) {
  1568 + if (!g->isSet || g->args.empty()) {
  1569 + if (!g->defaults.empty()) {
  1570 + out.append(*stringPtrs[i]);
  1571 + out.append(" ");
  1572 + quote = ((g->defaults.find_first_of(" \t") != std::string::npos) && (g->defaults.find_first_of("\'\"") == std::string::npos));
  1573 + if (quote)
  1574 + out.append("\"");
  1575 +
  1576 + out.append(g->defaults);
  1577 + if (quote)
  1578 + out.append("\"");
  1579 +
  1580 + out.append("\n");
  1581 + }
  1582 + } else {
  1583 + int n = g->args.size();
  1584 + for(int j=0; j < n; ++j) {
  1585 + out.append(*stringPtrs[i]);
  1586 + out.append(" ");
  1587 + m = g->args[j]->size();
  1588 +
  1589 + for(int k=0; k < m; ++k) {
  1590 + quote = ( (*g->args[j]->at(k)).find_first_of(" \t") != std::string::npos );
  1591 + if (quote)
  1592 + out.append("\"");
  1593 +
  1594 + out.append(*g->args[j]->at(k));
  1595 + if (quote)
  1596 + out.append("\"");
  1597 +
  1598 + if ((g->delim) && ((k+1) != m))
  1599 + out.append(1,g->delim);
  1600 + }
  1601 + out.append("\n");
  1602 + }
  1603 + }
  1604 + }
  1605 + }
  1606 +
  1607 + // Export the last args.
  1608 + for(i=0; i < lastArgs.size(); ++i) {
  1609 + quote = ( lastArgs[i]->find_first_of(" \t") != std::string::npos );
  1610 + if (quote)
  1611 + out.append("\"");
  1612 +
  1613 + out.append(*lastArgs[i]);
  1614 + if (quote)
  1615 + out.append("\"");
  1616 +
  1617 + out.append(" ");
  1618 + }
  1619 +
  1620 + std::ofstream file(filename);
  1621 + if (!file.is_open())
  1622 + return false;
  1623 +
  1624 + file << out;
  1625 + file.close();
  1626 +
  1627 + return true;
  1628 +};
  1629 +/* ################################################################### */
  1630 +// Does not overwrite current options.
  1631 +// Returns true if file was read successfully.
  1632 +// So if this is used before parsing CLI, then option values will reflect
  1633 +// this file, but if used after parsing CLI, then values will contain
  1634 +// both CLI values and file's values.
  1635 +//
  1636 +// Comment lines are allowed if prefixed with #.
  1637 +// Strings should be quoted as usual.
  1638 +bool ezOptionParser::importFile(const char * filename, char comment) {
  1639 + std::ifstream file (filename, std::ios::in | std::ios::ate);
  1640 + if (!file.is_open())
  1641 + return false;
  1642 +
  1643 + // Read entire file contents.
  1644 + std::ifstream::pos_type size = file.tellg();
  1645 + char * memblock = new char[(int)size+1]; // Add one for end of string.
  1646 + file.seekg (0, std::ios::beg);
  1647 + file.read (memblock, size);
  1648 + memblock[size] = '\0';
  1649 + file.close();
  1650 +
  1651 + // Find comment lines.
  1652 + std::list<std::string*> lines;
  1653 + std::string memblockstring(memblock);
  1654 + delete[] memblock;
  1655 + SplitDelim(memblockstring, '\n', lines);
  1656 + int i,j,n;
  1657 + std::list<std::string*>::iterator iter;
  1658 + std::vector<int> sq, dq; // Single and double quote indices.
  1659 + std::vector<int>::iterator lo; // For searching quote indices.
  1660 + size_t pos;
  1661 + const char *str;
  1662 + std::string *line;
  1663 + // Find all single and double quotes to correctly handle comment tokens.
  1664 + for(iter=lines.begin(); iter != lines.end(); ++iter) {
  1665 + line = *iter;
  1666 + str = line->c_str();
  1667 + n = line->size();
  1668 + sq.clear();
  1669 + dq.clear();
  1670 + if (n) {
  1671 + // If first char is comment, then erase line and continue.
  1672 + pos = line->find_first_not_of(" \t\r");
  1673 + if ((pos==std::string::npos) || (line->at(pos)==comment)) {
  1674 + line->erase();
  1675 + continue;
  1676 + } else {
  1677 + // Erase whitespace prefix.
  1678 + line->erase(0,pos);
  1679 + n = line->size();
  1680 + }
  1681 +
  1682 + if (line->at(0)=='"')
  1683 + dq.push_back(0);
  1684 +
  1685 + if (line->at(0)=='\'')
  1686 + sq.push_back(0);
  1687 + } else { // Empty line.
  1688 + continue;
  1689 + }
  1690 +
  1691 + for(i=1; i < n; ++i) {
  1692 + if ( (str[i]=='"') && (str[i-1]!='\\') )
  1693 + dq.push_back(i);
  1694 + else if ( (str[i]=='\'') && (str[i-1]!='\\') )
  1695 + sq.push_back(i);
  1696 + }
  1697 + // Scan for comments, and when found, check bounds of quotes.
  1698 + // Start with second char because already checked first char.
  1699 + for(i=1; i < n; ++i) {
  1700 + if ( (line->at(i)==comment) && (line->at(i-1)!='\\') ) {
  1701 + // If within open/close quote pair, then not real comment.
  1702 + if (sq.size()) {
  1703 + lo = std::lower_bound(sq.begin(), sq.end(), i);
  1704 + // All start of strings will be even indices, closing quotes is odd indices.
  1705 + j = (int)(lo-sq.begin());
  1706 + if ( (j % 2) == 0) { // Even implies comment char not in quote pair.
  1707 + // Erase from comment char to end of line.
  1708 + line->erase(i);
  1709 + break;
  1710 + }
  1711 + } else if (dq.size()) {
  1712 + // Repeat tests for double quotes.
  1713 + lo = std::lower_bound(dq.begin(), dq.end(), i);
  1714 + j = (int)(lo-dq.begin());
  1715 + if ( (j % 2) == 0) {
  1716 + line->erase(i);
  1717 + break;
  1718 + }
  1719 + } else {
  1720 + // Not in quotes.
  1721 + line->erase(i);
  1722 + break;
  1723 + }
  1724 + }
  1725 + }
  1726 + }
  1727 +
  1728 + std::string cmd;
  1729 + // Convert list to string without newlines to simulate commandline.
  1730 + for(iter=lines.begin(); iter != lines.end(); ++iter) {
  1731 + if (! (*iter)->empty()) {
  1732 + cmd.append(**iter);
  1733 + cmd.append(" ");
  1734 + }
  1735 + }
  1736 +
  1737 + // Now parse as if from command line.
  1738 + int argc=0;
  1739 + char** argv = CommandLineToArgvA((char*)cmd.c_str(), &argc);
  1740 +
  1741 + // Parse.
  1742 + parse(argc, (const char**)argv);
  1743 + if (argv) free(argv);
  1744 + for(iter=lines.begin(); iter != lines.end(); ++iter)
  1745 + delete *iter;
  1746 +
  1747 + return true;
  1748 +};
  1749 +/* ################################################################### */
  1750 +int ezOptionParser::isSet(const char * name) {
  1751 + std::string sname(name);
  1752 +
  1753 + if (this->optionGroupIds.count(sname)) {
  1754 + return this->groups[this->optionGroupIds[sname]]->isSet;
  1755 + }
  1756 +
  1757 + return 0;
  1758 +};
  1759 +/* ################################################################### */
  1760 +int ezOptionParser::isSet(std::string & name) {
  1761 + if (this->optionGroupIds.count(name)) {
  1762 + return this->groups[this->optionGroupIds[name]]->isSet;
  1763 + }
  1764 +
  1765 + return 0;
  1766 +};
  1767 +/* ################################################################### */
  1768 +OptionGroup * ezOptionParser::get(const char * name) {
  1769 + if (optionGroupIds.count(name)) {
  1770 + return groups[optionGroupIds[name]];
  1771 + }
  1772 +
  1773 + return 0;
  1774 +};
  1775 +/* ################################################################### */
  1776 +void ezOptionParser::getUsage(std::string & usage, int width, Layout layout) {
  1777 +
  1778 + usage.append(overview);
  1779 + usage.append("\n\n");
  1780 + usage.append("USAGE: ");
  1781 + usage.append(syntax);
  1782 + usage.append("\n\nOPTIONS:\n\n");
  1783 + getUsageDescriptions(usage, width, layout);
  1784 +
  1785 + if (!example.empty()) {
  1786 + usage.append("EXAMPLES:\n\n");
  1787 + usage.append(example);
  1788 + }
  1789 +
  1790 + if (!footer.empty()) {
  1791 + usage.append(footer);
  1792 + }
  1793 +};
  1794 +/* ################################################################### */
  1795 +// Creates 2 column formatted help descriptions for each option flag.
  1796 +void ezOptionParser::getUsageDescriptions(std::string & usage, int width, Layout layout) {
  1797 + // Sort each flag list amongst each group.
  1798 + int i;
  1799 + // Store index of flag groups before sort for easy lookup later.
  1800 + std::map<std::string*, int> stringPtrToIndexMap;
  1801 + std::vector<std::string* > stringPtrs(groups.size());
  1802 +
  1803 + for(i=0; i < groups.size(); ++i) {
  1804 + std::sort(groups[i]->flags.begin(), groups[i]->flags.end(), CmpOptStringPtr);
  1805 + stringPtrToIndexMap[groups[i]->flags[0]] = i;
  1806 + stringPtrs[i] = groups[i]->flags[0];
  1807 + }
  1808 +
  1809 + size_t j, k, n;
  1810 + std::string opts;
  1811 + std::vector<std::string> sortedOpts;
  1812 + // Sort first flag of each group with other groups.
  1813 + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr);
  1814 + for(i=0; i < groups.size(); ++i) {
  1815 + //printf("DEBUG:%d: %d %d %s\n", __LINE__, i, stringPtrToIndexMap[stringPtrs[i]], stringPtrs[i]->c_str());
  1816 + k = stringPtrToIndexMap[stringPtrs[i]];
  1817 + opts.clear();
  1818 + for(j=0; j < groups[k]->flags.size()-1; ++j) {
  1819 + opts.append(*groups[k]->flags[j]);
  1820 + opts.append(", ");
  1821 +
  1822 + if (opts.size() > width)
  1823 + opts.append("\n");
  1824 + }
  1825 + // The last flag. No need to append comma anymore.
  1826 + opts.append( *groups[k]->flags[j] );
  1827 +
  1828 + if (groups[k]->expectArgs) {
  1829 + opts.append(" ARG");
  1830 +
  1831 + if (groups[k]->delim) {
  1832 + opts.append("1[");
  1833 + opts.append(1, groups[k]->delim);
  1834 + opts.append("ARGn]");
  1835 + }
  1836 + }
  1837 +
  1838 + sortedOpts.push_back(opts);
  1839 + }
  1840 +
  1841 + // Each option group will use this to build multiline help description.
  1842 + std::list<std::string*> desc;
  1843 + // Number of whitespaces from start of line to description (interleave layout) or
  1844 + // gap between flag names and description (align, stagger layouts).
  1845 + int gutter = 3;
  1846 +
  1847 + // Find longest opt flag string to set column start for help usage descriptions.
  1848 + int maxlen=0;
  1849 + if (layout == ALIGN) {
  1850 + for(i=0; i < groups.size(); ++i) {
  1851 + if (maxlen < sortedOpts[i].size())
  1852 + maxlen = sortedOpts[i].size();
  1853 + }
  1854 + }
  1855 +
  1856 + // The amount of space remaining on a line for help text after flags.
  1857 + int helpwidth;
  1858 + std::list<std::string*>::iterator cIter, insertionIter;
  1859 + size_t pos;
  1860 + for(i=0; i < groups.size(); ++i) {
  1861 + k = stringPtrToIndexMap[stringPtrs[i]];
  1862 +
  1863 + if (layout == STAGGER)
  1864 + maxlen = sortedOpts[i].size();
  1865 +
  1866 + int pad = gutter + maxlen;
  1867 + helpwidth = width - pad;
  1868 +
  1869 + // All the following split-fu could be optimized by just using substring (offset, length) tuples, but just to get it done, we'll do some not-too expensive string copying.
  1870 + SplitDelim(groups[k]->help, '\n', desc);
  1871 + // Split lines longer than allowable help width.
  1872 + for(insertionIter=desc.begin(), cIter=insertionIter++;
  1873 + cIter != desc.end();
  1874 + cIter=insertionIter++) {
  1875 + if ((*cIter)->size() > helpwidth) {
  1876 + // Get pointer to next string to insert new strings before it.
  1877 + std::string *rem = *cIter;
  1878 + // Remove this line and add back in pieces.
  1879 + desc.erase(cIter);
  1880 + // Loop until remaining string is short enough.
  1881 + while (rem->size() > helpwidth) {
  1882 + // Find whitespace to split before helpwidth.
  1883 + if (rem->at(helpwidth) == ' ') {
  1884 + // If word ends exactly at helpwidth, then split after it.
  1885 + pos = helpwidth;
  1886 + } else {
  1887 + // Otherwise, split occurs midword, so find whitespace before this word.
  1888 + pos = rem->rfind(" ", helpwidth);
  1889 + }
  1890 + // Insert split string.
  1891 + desc.insert(insertionIter, new std::string(*rem, 0, pos));
  1892 + // Now skip any whitespace to start new line.
  1893 + pos = rem->find_first_not_of(' ', pos);
  1894 + rem->erase(0, pos);
  1895 + }
  1896 +
  1897 + if (rem->size())
  1898 + desc.insert(insertionIter, rem);
  1899 + else
  1900 + delete rem;
  1901 + }
  1902 + }
  1903 +
  1904 + usage.append(sortedOpts[i]);
  1905 + if (layout != INTERLEAVE)
  1906 + // Add whitespace between option names and description.
  1907 + usage.append(pad - sortedOpts[i].size(), ' ');
  1908 + else {
  1909 + usage.append("\n");
  1910 + usage.append(gutter, ' ');
  1911 + }
  1912 +
  1913 + // First line already padded above (before calling SplitDelim) after option flag names.
  1914 + cIter = desc.begin();
  1915 + usage.append(**cIter);
  1916 + usage.append("\n");
  1917 + // Now inject the pad for each line.
  1918 + for(++cIter; cIter != desc.end(); ++cIter) {
  1919 + usage.append(pad, ' ');
  1920 + usage.append(**cIter);
  1921 + usage.append("\n");
  1922 + }
  1923 +
  1924 + if (this->doublespace) usage.append("\n");
  1925 +
  1926 + if (desc.size()) {
  1927 + for(cIter=desc.begin(); cIter != desc.end(); ++cIter)
  1928 + delete *cIter;
  1929 +
  1930 + desc.clear();
  1931 + }
  1932 + }
  1933 +};
  1934 +/* ################################################################### */
  1935 +bool ezOptionParser::gotExpected(std::vector<std::string> & badOptions) {
  1936 + int i,j;
  1937 +
  1938 + for(i=0; i < groups.size(); ++i) {
  1939 + OptionGroup *g = groups[i];
  1940 + // If was set, ensure number of args is correct.
  1941 + if (g->isSet) {
  1942 + if ((g->expectArgs != 0) && g->args.empty()) {
  1943 + badOptions.push_back(*g->flags[0]);
  1944 + continue;
  1945 + }
  1946 +
  1947 + for(j=0; j < g->args.size(); ++j) {
  1948 + if ((g->expectArgs != -1) && (g->expectArgs != g->args[j]->size()))
  1949 + badOptions.push_back(*g->flags[0]);
  1950 + }
  1951 + }
  1952 + }
  1953 +
  1954 + return badOptions.empty();
  1955 +};
  1956 +/* ################################################################### */
  1957 +bool ezOptionParser::gotRequired(std::vector<std::string> & badOptions) {
  1958 + int i;
  1959 +
  1960 + for(i=0; i < groups.size(); ++i) {
  1961 + OptionGroup *g = groups[i];
  1962 + // Simple case when required but user never set it.
  1963 + if (g->isRequired && (!g->isSet)) {
  1964 + badOptions.push_back(*g->flags[0]);
  1965 + continue;
  1966 + }
  1967 + }
  1968 +
  1969 + return badOptions.empty();
  1970 +};
  1971 +/* ################################################################### */
  1972 +bool ezOptionParser::gotValid(std::vector<std::string> & badOptions, std::vector<std::string> & badArgs) {
  1973 + int groupid, validatorid;
  1974 + std::map< int, int >::iterator it;
  1975 +
  1976 + for(it = groupValidators.begin(); it != groupValidators.end(); ++it) {
  1977 + groupid = it->first;
  1978 + validatorid = it->second;
  1979 + if (validatorid < 0) continue;
  1980 +
  1981 + OptionGroup *g = groups[groupid];
  1982 + ezOptionValidator *v = validators[validatorid];
  1983 + bool nextgroup = false;
  1984 +
  1985 + for (int i = 0; i < g->args.size(); ++i) {
  1986 + if (nextgroup) break;
  1987 + std::vector< std::string* > * args = g->args[i];
  1988 + for (int j = 0; j < args->size(); ++j) {
  1989 + if (!v->isValid(args->at(j))) {
  1990 + badOptions.push_back(*g->flags[0]);
  1991 + badArgs.push_back(*args->at(j));
  1992 + nextgroup = true;
  1993 + break;
  1994 + }
  1995 + }
  1996 + }
  1997 + }
  1998 +
  1999 + return badOptions.empty();
  2000 +};
  2001 +/* ################################################################### */
  2002 +void ezOptionParser::parse(int argc, const char * argv[]) {
  2003 + if (argc < 1) return;
  2004 +
  2005 + /*
  2006 + std::map<std::string,int>::iterator it;
  2007 + for ( it=optionGroupIds.begin() ; it != optionGroupIds.end(); it++ )
  2008 + std::cout << (*it).first << " => " << (*it).second << std::endl;
  2009 + */
  2010 +
  2011 + int found=0, i, k, firstOptIndex=0, lastOptIndex=0;
  2012 + std::string s;
  2013 + OptionGroup *g;
  2014 +
  2015 + for(i=0; i < argc; ++i) {
  2016 + s = argv[i];
  2017 +
  2018 + if (optionGroupIds.count(s))
  2019 + break;
  2020 + }
  2021 +
  2022 + firstOptIndex = i;
  2023 +
  2024 + if (firstOptIndex == argc) {
  2025 + // No flags encountered, so set last args.
  2026 + this->firstArgs.push_back(new std::string(argv[0]));
  2027 +
  2028 + for(k=1; k < argc; ++k)
  2029 + this->lastArgs.push_back(new std::string(argv[k]));
  2030 +
  2031 + return;
  2032 + }
  2033 +
  2034 + // Store initial args before opts appear.
  2035 + for(k=0; k < i; ++k) {
  2036 + this->firstArgs.push_back(new std::string(argv[k]));
  2037 + }
  2038 +
  2039 + for(; i < argc; ++i) {
  2040 + s = argv[i];
  2041 +
  2042 + if (optionGroupIds.count(s)) {
  2043 + k = optionGroupIds[s];
  2044 + g = groups[k];
  2045 + g->isSet = 1;
  2046 + g->parseIndex.push_back(i);
  2047 +
  2048 + if (g->expectArgs) {
  2049 + // Read ahead to get args.
  2050 + ++i;
  2051 + if (i >= argc) return;
  2052 + g->args.push_back(new std::vector<std::string*>);
  2053 + SplitDelim(argv[i], g->delim, g->args.back());
  2054 + }
  2055 + lastOptIndex = i;
  2056 + }
  2057 + }
  2058 +
  2059 + // Scan for unknown opts/arguments.
  2060 + for(i=firstOptIndex; i <= lastOptIndex; ++i) {
  2061 + s = argv[i];
  2062 +
  2063 + if (optionGroupIds.count(s)) {
  2064 + k = optionGroupIds[s];
  2065 + g = groups[k];
  2066 + if (g->expectArgs) {
  2067 + // Read ahead for args and skip them.
  2068 + ++i;
  2069 + }
  2070 + } else {
  2071 + unknownArgs.push_back(new std::string(argv[i]));
  2072 + }
  2073 + }
  2074 +
  2075 + if ( lastOptIndex >= (argc-1) ) return;
  2076 +
  2077 + // Store final args without flags.
  2078 + for(k=lastOptIndex + 1; k < argc; ++k) {
  2079 + this->lastArgs.push_back(new std::string(argv[k]));
  2080 + }
  2081 +};
  2082 +/* ################################################################### */
  2083 +void ezOptionParser::prettyPrint(std::string & out) {
  2084 + char tmp[256];
  2085 + int i,j,k;
  2086 +
  2087 + out += "First Args:\n";
  2088 + for(i=0; i < firstArgs.size(); ++i) {
  2089 + sprintf(tmp, "%d: %s\n", i+1, firstArgs[i]->c_str());
  2090 + out += tmp;
  2091 + }
  2092 +
  2093 + // Sort the option flag names.
  2094 + int n = groups.size();
  2095 + std::vector<std::string* > stringPtrs(n);
  2096 + for(i=0; i < n; ++i) {
  2097 + stringPtrs[i] = groups[i]->flags[0];
  2098 + }
  2099 +
  2100 + // Sort first flag of each group with other groups.
  2101 + std::sort(stringPtrs.begin(), stringPtrs.end(), CmpOptStringPtr);
  2102 +
  2103 + out += "\nOptions:\n";
  2104 + OptionGroup *g;
  2105 + for(i=0; i < n; ++i) {
  2106 + g = get(stringPtrs[i]->c_str());
  2107 + out += "\n";
  2108 + // The flag names:
  2109 + for(j=0; j < g->flags.size()-1; ++j) {
  2110 + sprintf(tmp, "%s, ", g->flags[j]->c_str());
  2111 + out += tmp;
  2112 + }
  2113 + sprintf(tmp, "%s:\n", g->flags.back()->c_str());
  2114 + out += tmp;
  2115 +
  2116 + if (g->isSet) {
  2117 + if (g->expectArgs) {
  2118 + if (g->args.empty()) {
  2119 + sprintf(tmp, "%s (default)\n", g->defaults.c_str());
  2120 + out += tmp;
  2121 + } else {
  2122 + for(k=0; k < g->args.size(); ++k) {
  2123 + for(j=0; j < g->args[k]->size()-1; ++j) {
  2124 + sprintf(tmp, "%s%c", g->args[k]->at(j)->c_str(), g->delim);
  2125 + out += tmp;
  2126 + }
  2127 + sprintf(tmp, "%s\n", g->args[k]->back()->c_str(), g->delim);
  2128 + out += tmp;
  2129 + }
  2130 + }
  2131 + } else { // Set but no args expected.
  2132 + sprintf(tmp, "Set\n");
  2133 + out += tmp;
  2134 + }
  2135 + } else {
  2136 + sprintf(tmp, "Not set\n");
  2137 + out += tmp;
  2138 + }
  2139 + }
  2140 +
  2141 + out += "\nLast Args:\n";
  2142 + for(i=0; i < lastArgs.size(); ++i) {
  2143 + sprintf(tmp, "%d: %s\n", i+1, lastArgs[i]->c_str());
  2144 + out += tmp;
  2145 + }
  2146 +
  2147 + out += "\nUnknown Args:\n";
  2148 + for(i=0; i < unknownArgs.size(); ++i) {
  2149 + sprintf(tmp, "%d: %s\n", i+1, unknownArgs[i]->c_str());
  2150 + out += tmp;
  2151 + }
  2152 +};
  2153 +}
  2154 +/* ################################################################### */
  2155 +#endif /* EZ_OPTION_PARSER_H */
... ...
morfeusz/java/pl/waw/ipipan/morfeusz/MorfeuszProcessorType.java 0 → 100644
  1 +/* ----------------------------------------------------------------------------
  2 + * This file was automatically generated by SWIG (http://www.swig.org).
  3 + * Version 2.0.10
  4 + *
  5 + * Do not make changes to this file unless you know what you are doing--modify
  6 + * the SWIG interface file instead.
  7 + * ----------------------------------------------------------------------------- */
  8 +
  9 +package pl.waw.ipipan.morfeusz;
  10 +
  11 +public enum MorfeuszProcessorType {
  12 + GENERATOR,
  13 + ANALYZER;
  14 +
  15 + public final int swigValue() {
  16 + return swigValue;
  17 + }
  18 +
  19 + public static MorfeuszProcessorType swigToEnum(int swigValue) {
  20 + MorfeuszProcessorType[] swigValues = MorfeuszProcessorType.class.getEnumConstants();
  21 + if (swigValue < swigValues.length && swigValue >= 0 && swigValues[swigValue].swigValue == swigValue)
  22 + return swigValues[swigValue];
  23 + for (MorfeuszProcessorType swigEnum : swigValues)
  24 + if (swigEnum.swigValue == swigValue)
  25 + return swigEnum;
  26 + throw new IllegalArgumentException("No enum " + MorfeuszProcessorType.class + " with value " + swigValue);
  27 + }
  28 +
  29 + @SuppressWarnings("unused")
  30 + private MorfeuszProcessorType() {
  31 + this.swigValue = SwigNext.next++;
  32 + }
  33 +
  34 + @SuppressWarnings("unused")
  35 + private MorfeuszProcessorType(int swigValue) {
  36 + this.swigValue = swigValue;
  37 + SwigNext.next = swigValue+1;
  38 + }
  39 +
  40 + @SuppressWarnings("unused")
  41 + private MorfeuszProcessorType(MorfeuszProcessorType swigEnum) {
  42 + this.swigValue = swigEnum.swigValue;
  43 + SwigNext.next = this.swigValue+1;
  44 + }
  45 +
  46 + private final int swigValue;
  47 +
  48 + private static class SwigNext {
  49 + private static int next = 0;
  50 + }
  51 +}
  52 +
... ...
morfeusz/morfeusz_analyzer.cpp
... ... @@ -8,18 +8,96 @@
8 8 #include <cstdlib>
9 9 #include <iostream>
10 10 #include <vector>
  11 +#include <map>
11 12 #include "fsa/fsa.hpp"
12 13 #include "Tagset.hpp"
13 14 #include "Morfeusz.hpp"
14 15 #include "const.hpp"
15 16  
  17 +#include "cli/cli.hpp"
  18 +
16 19 using namespace std;
  20 +using namespace ez;
  21 +
  22 +int main(int argc, const char** argv) {
  23 +
  24 + ezOptionParser opt;
  25 +
  26 + opt.overview = "Morfeusz analyzer";
  27 + opt.syntax = string(argv[0]) + " [OPTIONS]";
  28 + opt.example = string(argv[0]) + " --aggl strict --praet split --input /path/to/file.fsa";
  29 + // opt.footer = "Morfeusz Copyright (C) 2014\n";
  30 +
  31 + opt.add(
  32 + "", // Default.
  33 + 0, // Required?
  34 + 0, // Number of args expected.
  35 + 0, // Delimiter if expecting multiple args.
  36 + "Display usage instructions.", // Help description.
  37 + "-h", // Flag token.
  38 + "-help", // Flag token.
  39 + "--help", // Flag token.
  40 + "--usage" // Flag token.
  41 + );
  42 +
  43 + opt.add(
  44 + "", // Default.
  45 + 0, // Required?
  46 + 1, // Number of args expected.
  47 + 0, // Delimiter if expecting multiple args.
  48 + "file with analyzer finite state automaton and data, created with buildfsa.py script.", // Help description.
  49 + "-i", // Flag token.
  50 + "-input", // Flag token.
  51 + "--input" // Flag token.
  52 + );
  53 +
  54 + opt.add(
  55 + "", // Default.
  56 + 0, // Required?
  57 + 1, // Number of args expected.
  58 + 0, // Delimiter if expecting multiple args.
  59 + "aggl option.", // Help description.
  60 + "-a", // Flag token.
  61 + "-aggl", // Flag token.
  62 + "--aggl" // Flag token.
  63 + );
  64 +
  65 + opt.add(
  66 + "", // Default.
  67 + 0, // Required?
  68 + 1, // Number of args expected.
  69 + 0, // Delimiter if expecting multiple args.
  70 + "praet option.", // Help description.
  71 + "-p", // Flag token.
  72 + "-praet", // Flag token.
  73 + "--praet" // Flag token.
  74 + );
  75 +
  76 + opt.parse(argc, argv);
  77 +
  78 + if (opt.isSet("-h")) {
  79 + printCLIUsage(opt, cout);
  80 + return 0;
  81 + }
17 82  
18   -int main(int argc, char** argv) {
19 83 Morfeusz morfeusz;
20   - if (argc > 1) {
21   - morfeusz.setAnalyzerFile(argv[1]);
22   - printf("Using dictionary from %s\n", argv[1]);
  84 + if (opt.isSet("-i")) {
  85 + string analyzerFile;
  86 + opt.get("-i")->getString(analyzerFile);
  87 + morfeusz.setAnalyzerFile(analyzerFile);
  88 + printf("Using dictionary from %s\n", analyzerFile.c_str());
  89 + }
  90 + if (opt.isSet("-a")) {
  91 + string aggl;
  92 + opt.get("-a")->getString(aggl);
  93 + cerr << "setting aggl option to " << aggl << endl;
  94 + morfeusz.setAggl(aggl);
  95 + }
  96 + if (opt.isSet("-p")) {
  97 + string praet;
  98 + opt.get("-p")->getString(praet);
  99 + cerr << "setting praet option to " << praet << endl;
  100 + morfeusz.setPraet(praet);
23 101 }
24 102 #ifdef _WIN32
25 103 morfeusz.setCharset(CP852);
... ... @@ -37,10 +115,11 @@ int main(int argc, char** argv) {
37 115 printf("[");
38 116 for (unsigned int i = 0; i < res.size(); i++) {
39 117 MorphInterpretation& mi = res[i];
40   - if (prevStart != -1
  118 + if (prevStart != -1
41 119 && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
42 120 printf("]\n[");
43   - } else if (prevStart != -1) {
  121 + }
  122 + else if (prevStart != -1) {
44 123 printf("; ");
45 124 }
46 125 printf("%d,%d,%s,%s,%s,%s",
... ...
morfeusz/segrules/segrules.cpp
... ... @@ -28,8 +28,12 @@ static inline const unsigned char* getFSAsMapPtr(const unsigned char* ptr) {
28 28  
29 29 static inline SegrulesOptions deserializeOptions(const unsigned char*& ptr) {
30 30 SegrulesOptions res;
31   - res["aggl"] = deserializeString(ptr);
32   - res["praet"] = deserializeString(ptr);
  31 + unsigned char optsNum = *ptr;
  32 + ptr++;
  33 + for (unsigned char i = 0; i < optsNum; i++) {
  34 + string key = deserializeString(ptr);
  35 + res[key] = deserializeString(ptr);
  36 + }
33 37 return res;
34 38 }
35 39  
... ... @@ -54,3 +58,35 @@ map&lt;SegrulesOptions, SegrulesFSA*&gt; createSegrulesFSAsMap(const unsigned char* an
54 58 }
55 59 return res;
56 60 }
  61 +
  62 +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr) {
  63 + const unsigned char* fsasMapPtr = getFSAsMapPtr(ptr);
  64 + const unsigned char* currPtr = fsasMapPtr;
  65 + unsigned char fsasNum = *currPtr;
  66 + currPtr++;
  67 + for (unsigned char i = 0; i < fsasNum; i++) {
  68 + deserializeOptions(currPtr);
  69 + deserializeFSA(currPtr);
  70 + }
  71 + return deserializeOptions(currPtr);
  72 +}
  73 +
  74 +SegrulesFSA* getDefaultSegrulesFSA(
  75 + const map<SegrulesOptions, SegrulesFSA*>& map,
  76 + const unsigned char* ptr) {
  77 + SegrulesOptions opts = getDefaultSegrulesOptions(ptr);
  78 + return (*(map.find(opts))).second;
  79 +}
  80 +
  81 +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res) {
  82 + map<SegrulesOptions, SegrulesFSA*>::const_iterator it = res.begin();
  83 + while (it != res.end()) {
  84 + SegrulesOptions::const_iterator it1 = it->first.begin();
  85 + while (it1 != it->first.end()) {
  86 + cerr << it1->first << " --> " << it1->second << endl;
  87 + it1++;
  88 + }
  89 + cerr << it->second << endl;
  90 + it++;
  91 + }
  92 +}
... ...
morfeusz/segrules/segrules.hpp
... ... @@ -18,6 +18,9 @@ typedef std::map&lt;std::string, std::string&gt; SegrulesOptions;
18 18 //typedef FSA<unsigned char> SegrulesFSAType;
19 19  
20 20 std::map<SegrulesOptions, SegrulesFSA*> createSegrulesFSAsMap(const unsigned char* analyzerPtr);
  21 +SegrulesOptions getDefaultSegrulesOptions(const unsigned char* ptr);
  22 +SegrulesFSA* getDefaultSegrulesFSA(const map<SegrulesOptions, SegrulesFSA*>& map, const unsigned char* analyzerPtr);
  23 +void debugMap(const map<SegrulesOptions, SegrulesFSA*>& res);
21 24  
22 25 #endif /* SEGRULES_HPP */
23 26  
... ...
nbproject/configurations.xml
... ... @@ -279,7 +279,7 @@
279 279 <ccTool>
280 280 <incDir>
281 281 <pElem>morfeusz</pElem>
282   - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem>
  282 + <pElem>/usr/lib/jvm/default-java/include</pElem>
283 283 </incDir>
284 284 <preprocessorList>
285 285 <Elem>libjmorfeusz_EXPORTS</Elem>
... ...