Blame view

morfeusz/Environment.hpp 4.74 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
/* 
 * File:   Environment.hpp
 * Author: mlenart
 *
 * Created on 22 styczeń 2014, 12:08
 */

#ifndef ENVIRONMENT_HPP
#define	ENVIRONMENT_HPP
Michał Lenart authored
11
#include <vector>
Michał Lenart authored
12
#include <set>
Michał Lenart authored
13
Michał Lenart authored
14
#include "case/CaseConverter.hpp"
Michał Lenart authored
15
#include "charset/CharsetConverter.hpp"
Michał Lenart authored
16
17
#include "fsa/fsa.hpp"
#include "segrules/segrules.hpp"
Michał Lenart authored
18
#include "const.hpp"
Michał Lenart authored
19
#include "IdResolverImpl.hpp"
Michał Lenart authored
20
#include "InterpsGroup.hpp"
Michał Lenart authored
21
22
#include "case/CasePatternHelper.hpp"
#include "deserialization/InterpsGroupsReader.hpp"
Michał Lenart authored
23
#include "Dictionary.hpp"
Michał Lenart authored
24
Michał Lenart authored
25
26
27
28
namespace morfeusz {

class InterpretedChunksDecoder;
class CasePatternHelper;
Michał Lenart authored
29
struct InterpsGroup;
Michał Lenart authored
30
typedef FSA<InterpsGroupsReader> FSAType;
Michał Lenart authored
31
Michał Lenart authored
32
33
34
35
36
/**
 * This class contains data required for morphological analysis/synthesis.
 * It contains references to dictionary automaton, charset converter, tagset data etc.
 * All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options).
 */
Michał Lenart authored
37
38
class Environment {
public:
Michał Lenart authored
39
40
41
42
43
44
45
    /**
     * Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr.
     * 
     * @param charset
     * @param morfeuszProcessor
     * @param fileStartPtr
     */
Michał Lenart authored
46
    explicit Environment(MorfeuszProcessorType morfeuszProcessor, bool usable);
Michał Lenart authored
47
Michał Lenart authored
48
49
50
51
52
    /**
     * Sets charset for this environment.
     * 
     * @param charset
     */
Michał Lenart authored
53
    void setCharset(Charset charset);
Michał Lenart authored
54
Michał Lenart authored
55
56
57
58
59
    /**
     * Sets case sensitivity options.
     * 
     * @param caseSensitive - if true, interpretations not matching case will be discarded.
     */
Michał Lenart authored
60
61
    void setCaseSensitive(bool caseSensitive);
Michał Lenart authored
62
63
64
65
66
67
    /**
     * Gets charset converter that is currently used by this environment.
     * Changed by setting charset.
     * 
     * @return - reference to charset converter.
     */
Michał Lenart authored
68
69
    const CharsetConverter& getCharsetConverter() const;
Michał Lenart authored
70
71
72
73
74
75
    /**
     * Returns case converter that is currently used by this environment.
     * Changed by setting case sensitivity option.
     * 
     * @return - reference to case converter.
     */
Michał Lenart authored
76
    const CaseConverter& getCaseConverter() const;
Michał Lenart authored
77
Michał Lenart authored
78
79
80
81
82
83
//    /**
//     * Sets new tagset for this environment.
//     * 
//     * @param tagset
//     */
//    void setTagset(IdResolverImpl& tagset);
Michał Lenart authored
84
85
86
87
88
89

    /**
     * Gets currently used tagset.
     * 
     * @return 
     */
Michał Lenart authored
90
    const IdResolverImpl& getIdResolver() const;
Michał Lenart authored
91
Michał Lenart authored
92
    /**
Michał Lenart authored
93
     * Sets dictionary by this environment.
Michał Lenart authored
94
     * 
Michał Lenart authored
95
     * @param dict - pointer to the dictionary
Michał Lenart authored
96
     */
Michał Lenart authored
97
    void setDictionary(const Dictionary* dict);
Michał Lenart authored
98
Michał Lenart authored
99
100
101
102
103
104
    /**
     * Sets segmentation rules option.
     * 
     * @param option
     * @param value
     */
Michał Lenart authored
105
106
    void setSegrulesOption(const std::string& option, const std::string& value);
Michał Lenart authored
107
108
109
110
111
    /**
     * Gets segmentation rules automaton.
     * 
     * @return 
     */
Michał Lenart authored
112
113
    const SegrulesFSA& getCurrentSegrulesFSA() const;
Michał Lenart authored
114
115
116
117
118
    /**
     * Gets dictionary automaton.
     * 
     * @return 
     */
Michał Lenart authored
119
120
    const FSAType& getFSA() const;
Michał Lenart authored
121
122
123
124
    /**
     * Returns decoder that converts interpretations to external format.
     * @return 
     */
Michał Lenart authored
125
    const InterpretedChunksDecoder& getInterpretedChunksDecoder() const;
Michał Lenart authored
126
Michał Lenart authored
127
128
129
130
    /**
     * Gets processor type (info if this is analyzer or generator environment)
     * @return 
     */
Michał Lenart authored
131
132
    MorfeuszProcessorType getProcessorType() const;
Michał Lenart authored
133
134
135
136
137
    /**
     * Return current case pattern helper
     * 
     * @return 
     */
Michał Lenart authored
138
139
    const CasePatternHelper& getCasePatternHelper() const;
Michał Lenart authored
140
141
142
143
144
    /**
     * Returns true iff given codepoint denotes a separator char for ign handling.
     * @param codepoint
     * @return 
     */
Michał Lenart authored
145
146
    bool isSeparator(uint32_t codepoint) const;
Michał Lenart authored
147
148
149
150
151
152
153
154
155
156
    const std::set<std::string>& getAvailableAgglOptions() const;

    const std::set<std::string>& getAvailablePraetOptions() const;

    /**
     * Returns true iff this Environment has dictionary attached to it
     * @return 
     */
    bool isUsable() const;
Michał Lenart authored
157
158
    virtual ~Environment();
private:
Michał Lenart authored
159
    bool usable;
Michał Lenart authored
160
161
    const CharsetConverter* currentCharsetConverter;
    const CaseConverter caseConverter;
Michał Lenart authored
162
Michał Lenart authored
163
164
165
166
167
168
169
170
    const Dictionary* dictionary;
    IdResolverImpl idResolver;
//    IdResolverImpl tagset;
//    
//    const unsigned char* fsaFileStartPtr;
//    const FSAType* fsa;
//    std::vector<uint32_t> separatorsList;
//    std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap;
Michał Lenart authored
171
    SegrulesOptions currSegrulesOptions;
Michał Lenart authored
172
    const SegrulesFSA* currSegrulesFSA;
Michał Lenart authored
173
//    bool isFromFile;
Michał Lenart authored
174
175

    const InterpretedChunksDecoder* chunksDecoder;
Michał Lenart authored
176
    MorfeuszProcessorType processorType;
Michał Lenart authored
177
    CasePatternHelper* casePatternHelper;
Michał Lenart authored
178
Michał Lenart authored
179
    const CharsetConverter* getCharsetConverter(Charset charset) const;
Michał Lenart authored
180
181

    std::string getAvailableOptionsAsString(const std::string& option) const;
Michał Lenart authored
182
183
};
Michał Lenart authored
184
185
}
Michał Lenart authored
186
187
#endif	/* ENVIRONMENT_HPP */