Blame view

tools/maca/libmaca/conv/predicate.cpp 2.49 KB
Jan Lupa authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
    Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
    Part of the libmaca project

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE.MACA, LICENSE.SFST, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/

#include <libmaca/conv/predicate.h>
#include <boost/foreach.hpp>
#include <libmaca/exception.h>

namespace Maca {
namespace Conversion {

TagPredicate::TagPredicate(const std::string& name, const Corpus2::Tagset& tagset)
{
	second = tagset.get_value_mask(name);
	if (second.any()) {
		first = tagset.get_attribute_mask(tagset.get_value_attribute(second));
	} else {
		first = tagset.get_attribute_mask(name);
		if (first.none()) {
			second = tagset.get_pos_mask(name);
			if (second.none()) {
				throw MacaError("Predicate string invalid: '" + name +
						"' in tagset " + tagset.name());
			}
		}
	}
}

bool TagPredicate::check(const Corpus2::Tag &tag) const
{
	if (first.any()) {
		return tag.get_values_for(first) == second;
	} else {
		return tag.get_pos() == second;
	}
}

bool TagPredicate::token_match(const Corpus2::Token& t) const
{
	if (first.any()) {
		BOOST_FOREACH(const Corpus2::Lexeme& lex, t.lexemes()) {
			if (lex.tag().get_values_for(first) != second) return false;
		}
	} else {
		BOOST_FOREACH(const Corpus2::Lexeme& lex, t.lexemes()) {
			if (lex.tag().get_pos() != second) return false;
		}
	}
	return true;
}

void TagPredicate::apply(Corpus2::Tag &tag) const
{
	if (first.any()) {
		tag.add_values_masked(second, first);
	} else {
		tag.set_pos(second);
	}
}

void apply_predicates(const std::vector<TagPredicate>& v, Corpus2::Token& t)
{
	BOOST_FOREACH(Corpus2::Lexeme& lex, t.lexemes()) {
		Corpus2::Tag newtag = lex.tag();
		BOOST_FOREACH(const TagPredicate& tp, v) {
			tp.apply(newtag);
		}
		lex.set_tag(newtag);
	}
}

PosOrthPredicate::PosOrthPredicate()
{
}

PosOrthPredicate::PosOrthPredicate(Corpus2::mask_t pos, const UnicodeString &orth)
	: std::pair<Corpus2::mask_t, UnicodeString>(pos, orth)
{
}

bool PosOrthPredicate::check(const Corpus2::Token &token) const
{
	return token.orth_pos_match(first, second);
}

} /* end ns Conversion */
} /* end ns Maca */