|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.SFST, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#include <libmaca/conv/predicate.h>
#include <boost/foreach.hpp>
#include <libmaca/exception.h>
namespace Maca {
namespace Conversion {
TagPredicate::TagPredicate(const std::string& name, const Corpus2::Tagset& tagset)
{
second = tagset.get_value_mask(name);
if (second.any()) {
first = tagset.get_attribute_mask(tagset.get_value_attribute(second));
} else {
first = tagset.get_attribute_mask(name);
if (first.none()) {
second = tagset.get_pos_mask(name);
if (second.none()) {
throw MacaError("Predicate string invalid: '" + name +
"' in tagset " + tagset.name());
}
}
}
}
bool TagPredicate::check(const Corpus2::Tag &tag) const
{
if (first.any()) {
return tag.get_values_for(first) == second;
} else {
return tag.get_pos() == second;
}
}
bool TagPredicate::token_match(const Corpus2::Token& t) const
{
if (first.any()) {
BOOST_FOREACH(const Corpus2::Lexeme& lex, t.lexemes()) {
if (lex.tag().get_values_for(first) != second) return false;
}
} else {
BOOST_FOREACH(const Corpus2::Lexeme& lex, t.lexemes()) {
if (lex.tag().get_pos() != second) return false;
}
}
return true;
}
void TagPredicate::apply(Corpus2::Tag &tag) const
{
if (first.any()) {
tag.add_values_masked(second, first);
} else {
tag.set_pos(second);
}
}
void apply_predicates(const std::vector<TagPredicate>& v, Corpus2::Token& t)
{
BOOST_FOREACH(Corpus2::Lexeme& lex, t.lexemes()) {
Corpus2::Tag newtag = lex.tag();
BOOST_FOREACH(const TagPredicate& tp, v) {
tp.apply(newtag);
}
lex.set_tag(newtag);
}
}
PosOrthPredicate::PosOrthPredicate()
{
}
PosOrthPredicate::PosOrthPredicate(Corpus2::mask_t pos, const UnicodeString &orth)
: std::pair<Corpus2::mask_t, UnicodeString>(pos, orth)
{
}
bool PosOrthPredicate::check(const Corpus2::Token &token) const
{
return token.orth_pos_match(first, second);
}
} /* end ns Conversion */
} /* end ns Maca */
|