Blame view

morfeusz/Environment.hpp 4.71 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
/* 
 * File:   Environment.hpp
 * Author: mlenart
 *
 * Created on 22 styczeń 2014, 12:08
 */

#ifndef ENVIRONMENT_HPP
#define	ENVIRONMENT_HPP
Michał Lenart authored
11
#include <vector>
Michał Lenart authored
12
#include <set>
Michał Lenart authored
13
Michał Lenart authored
14
#include "case/CaseConverter.hpp"
Michał Lenart authored
15
#include "charset/CharsetConverter.hpp"
Michał Lenart authored
16
17
#include "fsa/fsa.hpp"
#include "segrules/segrules.hpp"
Michał Lenart authored
18
#include "const.hpp"
Michał Lenart authored
19
#include "IdResolverImpl.hpp"
Michał Lenart authored
20
#include "InterpsGroup.hpp"
Michał Lenart authored
21
22
#include "case/CasePatternHelper.hpp"
#include "deserialization/InterpsGroupsReader.hpp"
Michał Lenart authored
23
#include "Dictionary.hpp"
Michał Lenart authored
24
Michał Lenart authored
25
26
27
28
namespace morfeusz {

class InterpretedChunksDecoder;
class CasePatternHelper;
Michał Lenart authored
29
struct InterpsGroup;
Michał Lenart authored
30
typedef FSA<InterpsGroupsReader> FSAType;
Michał Lenart authored
31
Michał Lenart authored
32
33
34
35
36
/**
 * This class contains data required for morphological analysis/synthesis.
 * It contains references to dictionary automaton, charset converter, tagset data etc.
 * All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options).
 */
Michał Lenart authored
37
38
class Environment {
public:
Michał Lenart authored
39
40
41
42
43
44
45
    /**
     * Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr.
     * 
     * @param charset
     * @param morfeuszProcessor
     * @param fileStartPtr
     */
Michał Lenart authored
46
    Environment(const std::string& dictName, MorfeuszProcessorType morfeuszProcessor, bool usable);
Michał Lenart authored
47
Michał Lenart authored
48
49
50
51
52
    /**
     * Sets charset for this environment.
     * 
     * @param charset
     */
Michał Lenart authored
53
    void setCharset(Charset charset);
Michał Lenart authored
54
Michał Lenart authored
55
56
57
58
59
    /**
     * Sets case sensitivity options.
     * 
     * @param caseSensitive - if true, interpretations not matching case will be discarded.
     */
Michał Lenart authored
60
61
    void setCaseSensitive(bool caseSensitive);
Michał Lenart authored
62
63
64
65
66
67
    /**
     * Gets charset converter that is currently used by this environment.
     * Changed by setting charset.
     * 
     * @return - reference to charset converter.
     */
Michał Lenart authored
68
69
    const CharsetConverter& getCharsetConverter() const;
Michał Lenart authored
70
71
72
73
74
75
    /**
     * Returns case converter that is currently used by this environment.
     * Changed by setting case sensitivity option.
     * 
     * @return - reference to case converter.
     */
Michał Lenart authored
76
    const CaseConverter& getCaseConverter() const;
Michał Lenart authored
77
Michał Lenart authored
78
79
80
81
82
83
//    /**
//     * Sets new tagset for this environment.
//     * 
//     * @param tagset
//     */
//    void setTagset(IdResolverImpl& tagset);
Michał Lenart authored
84
85
86
87
88
89

    /**
     * Gets currently used tagset.
     * 
     * @return 
     */
Michał Lenart authored
90
    const IdResolverImpl& getIdResolver() const;
Michał Lenart authored
91
Michał Lenart authored
92
    /**
Michał Lenart authored
93
     * Sets dictionary by this environment.
Michał Lenart authored
94
     * 
Michał Lenart authored
95
     * @param dict - pointer to the dictionary
Michał Lenart authored
96
     */
Michał Lenart authored
97
    void setDictionary(const Dictionary* dict);
Michał Lenart authored
98
Michał Lenart authored
99
100
101
102
103
104
    /**
     * Sets segmentation rules option.
     * 
     * @param option
     * @param value
     */
Michał Lenart authored
105
106
    void setSegrulesOption(const std::string& option, const std::string& value);
Michał Lenart authored
107
    /**
Michał Lenart authored
108
109
110
111
112
113
114
115
     * Gets current segmentation rules option.
     * 
     * @param option
     * @param value
     */
    std::string getSegrulesOption(const std::string& option) const;

    /**
Michał Lenart authored
116
117
118
119
     * Gets segmentation rules automaton.
     * 
     * @return 
     */
Michał Lenart authored
120
121
    const SegrulesFSA& getCurrentSegrulesFSA() const;
Michał Lenart authored
122
123
124
125
126
    /**
     * Gets dictionary automaton.
     * 
     * @return 
     */
Michał Lenart authored
127
128
    const FSAType& getFSA() const;
Michał Lenart authored
129
130
131
132
    /**
     * Returns decoder that converts interpretations to external format.
     * @return 
     */
Michał Lenart authored
133
    const InterpretedChunksDecoder& getInterpretedChunksDecoder() const;
Michał Lenart authored
134
Michał Lenart authored
135
136
137
138
    /**
     * Gets processor type (info if this is analyzer or generator environment)
     * @return 
     */
Michał Lenart authored
139
140
    MorfeuszProcessorType getProcessorType() const;
Michał Lenart authored
141
142
143
144
145
    /**
     * Return current case pattern helper
     * 
     * @return 
     */
Michał Lenart authored
146
147
    const CasePatternHelper& getCasePatternHelper() const;
Michał Lenart authored
148
149
150
151
152
    /**
     * Returns true iff given codepoint denotes a separator char for ign handling.
     * @param codepoint
     * @return 
     */
Michał Lenart authored
153
154
    bool isSeparator(uint32_t codepoint) const;
Michał Lenart authored
155
156
157
158
159
160
161
162
163
164
    const std::set<std::string>& getAvailableAgglOptions() const;

    const std::set<std::string>& getAvailablePraetOptions() const;

    /**
     * Returns true iff this Environment has dictionary attached to it
     * @return 
     */
    bool isUsable() const;
Michał Lenart authored
165
166
    virtual ~Environment();
private:
Michał Lenart authored
167
    bool usable;
Michał Lenart authored
168
169
    const CharsetConverter* currentCharsetConverter;
    const CaseConverter caseConverter;
Michał Lenart authored
170
Michał Lenart authored
171
172
    const Dictionary* dictionary;
    IdResolverImpl idResolver;
Michał Lenart authored
173
Michał Lenart authored
174
    SegrulesOptions currSegrulesOptions;
Michał Lenart authored
175
176
177
    const SegrulesFSA* currSegrulesFSA;

    const InterpretedChunksDecoder* chunksDecoder;
Michał Lenart authored
178
    MorfeuszProcessorType processorType;
Michał Lenart authored
179
    CasePatternHelper* casePatternHelper;
Michał Lenart authored
180
Michał Lenart authored
181
    const CharsetConverter* getCharsetConverter(Charset charset) const;
Michał Lenart authored
182
183

    std::string getAvailableOptionsAsString(const std::string& option) const;
Michał Lenart authored
184
185
};
Michał Lenart authored
186
187
}
Michał Lenart authored
188
189
#endif	/* ENVIRONMENT_HPP */