morph_basic.cpp
3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.SFST, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#include <boost/test/unit_test.hpp>
#include <libmaca/morph/constanalyser.h>
#include <libmaca/morph/dispatchanalyser.h>
#include <libcorpus2/tagsetparser.h>
struct F {
F() : tagset(), t(UnicodeString::fromUTF8("aaa"), "t", PwrNlp::Whitespace::ManySpaces)
{
const char tagset_string[] = "[ATTR]\nA a1 a2 a3\nB b1 b2 b3\n[POS]\nign\n P1 A [B]\n P2 A\n";
try {
tagset.reset(new Corpus2::Tagset);
*tagset = Corpus2::Tagset::from_data(tagset_string);
} catch (Corpus2::TagsetParseError& e) {
std::cerr << e.info();
throw;
}
}
~F() {
}
boost::shared_ptr<Corpus2::Tagset> tagset;
Toki::Token t;
};
BOOST_FIXTURE_TEST_CASE( morph_const, F )
{
Maca::ConstAnalyser a(tagset.get(), "P1:a1");
std::vector<Corpus2::Token*> tv = a.process(t);
BOOST_REQUIRE_EQUAL(tv.size(), 1);
Corpus2::Token* tt = tv[0];
BOOST_CHECK_EQUAL(tt->orth_utf8(), t.orth_utf8());
BOOST_CHECK_EQUAL(tt->wa(), t.preceeding_whitespace());
BOOST_REQUIRE_EQUAL(tt->lexemes().size(), 1);
const Corpus2::Lexeme& lex = tt->lexemes()[0];
BOOST_CHECK(lex.lemma() == t.orth());
BOOST_CHECK_EQUAL(tagset->tag_to_string(lex.tag()), "P1:a1");
delete tt;
}
struct Fd : public F
{
Fd() : F(), a(tagset.get()) {
tag1s = "P1:a2:b1";
tag2s = "P2:a1";
Maca::ConstAnalyser* ca1 = new Maca::ConstAnalyser(tagset.get(), tag1s);
Maca::ConstAnalyser* ca2 = new Maca::ConstAnalyser(tagset.get(), tag2s);
a.add_type_handler("t", ca1);
a.add_type_handler("a", ca2);
a.add_type_handler("b", ca2);
}
~Fd() {
}
Maca::DispatchAnalyser a;
std::string tag1s;
std::string tag2s;
};
BOOST_FIXTURE_TEST_CASE( morph_dispatch1, Fd )
{
std::vector<Corpus2::Token*> tv = a.process(t);
BOOST_REQUIRE_EQUAL(tv.size(), 1);
Corpus2::Token* tt = tv[0];
BOOST_REQUIRE_EQUAL(tt->lexemes().size(), 1);
const Corpus2::Lexeme& lex = tt->lexemes()[0];
BOOST_CHECK(lex.lemma() == t.orth());
BOOST_CHECK_EQUAL(tagset->tag_to_string(lex.tag()), tag1s);
delete tt;
}
BOOST_FIXTURE_TEST_CASE( morph_dispatch2, Fd )
{
t.set_type("a");
std::vector<Corpus2::Token*> tv = a.process(t);
BOOST_REQUIRE_EQUAL(tv.size(), 1);
Corpus2::Token* tt = tv[0];
BOOST_REQUIRE_EQUAL(tt->lexemes().size(), 1);
const Corpus2::Lexeme& lex2 = tt->lexemes()[0];
BOOST_CHECK(lex2.lemma() == t.orth());
BOOST_CHECK_EQUAL(tagset->tag_to_string(lex2.tag()), tag2s);
delete tt;
}
BOOST_FIXTURE_TEST_CASE( morph_dispatch3, Fd )
{
t.set_type("b");
std::vector<Corpus2::Token*> tv = a.process(t);
BOOST_REQUIRE_EQUAL(tv.size(), 1);
Corpus2::Token* tt = tv[0];
BOOST_REQUIRE_EQUAL(tt->lexemes().size(), 1);
const Corpus2::Lexeme& lex = tt->lexemes()[0];
BOOST_CHECK(lex.lemma() == t.orth());
BOOST_CHECK_EQUAL(tagset->tag_to_string(lex.tag()), tag2s);
delete tt;
}
BOOST_FIXTURE_TEST_CASE( morph_dispatch4, Fd )
{
t.set_type("ZZZ");
BOOST_CHECK_THROW(a.process(t), Maca::MacaError);
}