constanalyser.cpp
1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.SFST, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#include <libmaca/morph/constanalyser.h>
namespace Maca {
const char* ConstAnalyser::identifier = "const";
bool ConstAnalyser::registered =
MorphAnalyser::register_analyser<ConstAnalyser>();
ConstAnalyser::ConstAnalyser(const Corpus2::Tagset *tagset, const std::string &tag)
: MorphAnalyser(tagset), tag_(tagset->parse_simple_tag(tag)), lower_lemma_(false)
{
}
ConstAnalyser::ConstAnalyser(const Corpus2::Tagset *tagset, const Corpus2::Tag &tag)
: MorphAnalyser(tagset), tag_(tag), lower_lemma_(false)
{
}
ConstAnalyser::ConstAnalyser(const Config::Node& cfg)
: MorphAnalyser(cfg), tag_(), lower_lemma_(false)
{
std::string tag_string = cfg.get("tag", "");
if (tag_string.empty()) {
throw ConfigValueMissing("tag", "ConstAnalyser");
}
tag_ = tagset().parse_simple_tag(tag_string);
lower_lemma_ = cfg.get("lower_lemma", false);
}
ConstAnalyser* ConstAnalyser::clone() const
{
return new ConstAnalyser(*this);
}
bool ConstAnalyser::process_functional(const Toki::Token &t,
boost::function<void (Corpus2::Token*)> sink)
{
Corpus2::Token* tt = create_from_toki(t);
UnicodeString lemma = t.orth();
if (lower_lemma_) {
lemma.toLower();
};
tt->add_lexeme(Corpus2::Lexeme(lemma, tag_));
sink(tt);
return true;
}
} /* end ns Maca */