|
1
2
3
4
|
#include <iostream>
#include <cstdlib>
#include "cli.hpp"
|
|
5
|
#include "../const.hpp"
|
|
6
7
8
9
10
11
12
13
14
15
|
using namespace std;
using namespace ez;
static inline void printCLIUsage(ezOptionParser& opt, ostream& out) {
string usage;
opt.getUsage(usage);
out << usage;
}
|
|
16
|
ezOptionParser* getOptions(int argc, const char** argv, MorfeuszProcessorType processorType) {
|
|
17
|
|
|
18
|
ezOptionParser& opt = *(new ezOptionParser());
|
|
19
|
|
|
20
21
22
|
opt.overview = processorType == ANALYZER
? "Morfeusz analyzer"
: "Morfeusz generator";
|
|
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
opt.syntax = string(argv[0]) + " [OPTIONS]";
opt.example = string(argv[0]) + " --aggl strict --praet split --input /path/to/file.fsa";
// opt.footer = "Morfeusz Copyright (C) 2014\n";
opt.add(
"", // Default.
0, // Required?
0, // Number of args expected.
0, // Delimiter if expecting multiple args.
"Display usage instructions.", // Help description.
"-h", // Flag token.
"-help", // Flag token.
"--help", // Flag token.
"--usage" // Flag token.
);
opt.add(
"", // Default.
0, // Required?
1, // Number of args expected.
0, // Delimiter if expecting multiple args.
"file with analyzer finite state automaton and data, created with buildfsa.py script.", // Help description.
"-i", // Flag token.
"-input", // Flag token.
"--input" // Flag token.
);
opt.add(
"", // Default.
0, // Required?
1, // Number of args expected.
0, // Delimiter if expecting multiple args.
"aggl option.", // Help description.
"-a", // Flag token.
"-aggl", // Flag token.
"--aggl" // Flag token.
);
opt.add(
"", // Default.
0, // Required?
1, // Number of args expected.
0, // Delimiter if expecting multiple args.
"praet option.", // Help description.
"-p", // Flag token.
"-praet", // Flag token.
"--praet" // Flag token.
);
|
|
71
|
|
|
72
73
74
75
76
77
78
79
80
81
82
|
opt.add(
"", // Default.
0, // Required?
1, // Number of args expected.
0, // Delimiter if expecting multiple args.
"input/output charset", // Help description.
"-c", // Flag token.
"-charset", // Flag token.
"--charset" // Flag token.
);
|
|
83
84
85
86
87
88
89
90
91
92
93
94
|
if (processorType == ANALYZER) {
opt.add(
"", // Default.
0, // Required?
1, // Number of args expected.
0, // Delimiter if expecting multiple args.
"case insensitive - don't force matching uppercase with dictionary forms", // Help description.
"-case-insensitive", // Flag token.
"--case-insensitive" // Flag token.
);
}
|
|
95
96
97
98
99
|
opt.add(
"", // Default.
0, // Required?
0, // Number of args expected.
0, // Delimiter if expecting multiple args.
|
|
100
|
"show some debug information.", // Help description.
|
|
101
102
103
104
|
"-d", // Flag token.
"-debug", // Flag token.
"--debug" // Flag token.
);
|
|
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
opt.parse(argc, argv);
if (opt.firstArgs.size() > 1) {
cerr << "Invalid argument (not bound to any flag): " << *opt.firstArgs[1] << endl;
exit(1);
}
if (!opt.lastArgs.empty()) {
cerr << "Invalid argument (not bound to any flag): " << *opt.lastArgs[0] << endl;
exit(1);
}
if (opt.isSet("-h")) {
printCLIUsage(opt, cout);
exit(0);
}
|
|
123
|
return &opt;
|
|
124
125
|
}
|
|
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
static MorfeuszCharset getCharset(const string& encodingStr) {
if (encodingStr == "UTF8")
return UTF8;
else if (encodingStr == "ISO8859_2")
return ISO8859_2;
else if (encodingStr == "CP1250")
return CP1250;
else if (encodingStr == "CP852")
return CP852;
else {
cerr << "Invalid encoding: '" << encodingStr << "'. Must be one of: UTF8, ISO8859_2, WINDOWS1250" << endl;
throw "Invalid encoding";
}
}
|
|
141
|
void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz) {
|
|
142
143
144
|
if (opt.isSet("-i")) {
string analyzerFile;
opt.get("-i")->getString(analyzerFile);
|
|
145
|
morfeusz.setAnalyzerDictionary(analyzerFile);
|
|
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
printf("Using dictionary from %s\n", analyzerFile.c_str());
}
if (opt.isSet("-a")) {
string aggl;
opt.get("-a")->getString(aggl);
cerr << "setting aggl option to " << aggl << endl;
morfeusz.setAggl(aggl);
}
if (opt.isSet("-p")) {
string praet;
opt.get("-p")->getString(praet);
cerr << "setting praet option to " << praet << endl;
morfeusz.setPraet(praet);
}
|
|
160
161
162
163
|
if (opt.isSet("-d")) {
cerr << "setting debug to TRUE" << endl;
morfeusz.setDebug(true);
}
|
|
164
165
166
167
|
if (opt.isSet("-case-insensitive")) {
cerr << "setting case sensitive to FALSE" << endl;
morfeusz.setCaseSensitive(false);
}
|
|
168
169
170
171
172
173
|
if (opt.isSet("-c")) {
string charset;
opt.get("-c")->getString(charset);
cerr << "setting charset to " << charset << endl;
morfeusz.setCharset(getCharset(charset));
}
|
|
174
|
#if defined(_WIN64) || defined(_WIN32)
|
|
175
176
177
|
morfeusz.setCharset(CP852);
#endif
}
|