|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.SFST, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#include <libmaca/conv/joinlayer.h>
#include <libcorpus2/tagsetmanager.h>
#include <boost/foreach.hpp>
namespace Maca {
namespace Conversion {
JoinLayer::JoinLayer(const Corpus2::Tagset& tagset)
: OneTagsetLayer(tagset), buf_(NULL)
{
}
JoinLayer::JoinLayer(const Config::Node& cfg)
: OneTagsetLayer(Corpus2::get_named_tagset(cfg.get<std::string>("tagset")))
, buf_(NULL)
{
append_rule(cfg);
}
JoinLayer::~JoinLayer()
{
delete buf_;
}
JoinLayer* JoinLayer::clone() const
{
JoinLayer* copy = new JoinLayer(tagset());
copy->rules_ = rules_;
if (buf_ != NULL) {
copy->buf_ = buf_->clone();
}
return copy;
}
void JoinLayer::append_rule(const JoinRule &rule)
{
Corpus2::require_matching_tagsets(rule, *this, "appending join rule");
rules_.push_back(rule);
}
void JoinLayer::append_rule(const Config::Node& cfg)
{
JoinRule jr(cfg);
append_rule(jr);
}
Corpus2::Token* JoinLayer::get_next_token()
{
if (buf_ == NULL) {
buf_ = source()->get_next_token();
}
Corpus2::Token* t = source()->get_next_token();
if (t != NULL && t->wa() == PwrNlp::Whitespace::None) {
BOOST_FOREACH(JoinRule& rule, rules_) {
Corpus2::Token* joined = rule.try_join(buf_, t);
if (joined != NULL) {
buf_ = joined;
t = source()->get_next_token();
if (t == NULL || t->wa() != PwrNlp::Whitespace::None) {
break;
}
}
}
}
std::swap(t, buf_);
return t;
}
} /* end ns Conversion */
} /* end ns Maca */
|