morfeusz-nkjp-official-guesser.ini
806 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
; MorfeuszSGJP outputting in NKJP but with fallback on Guesser from libcorpus1, naively converted to NKJP.
; NOTE: this config assumes the official version of Morfeusz SGJP is installed.
[general]
tagset=nkjp
; dot and hyphen sequences as separate tokens
toki-config=nkjp
[ma:unknown]
class=const
tagset=nkjp
tag=ign
[ma:interp]
class=const
tagset=nkjp
tag=interp
[ma:url]
class=const
tagset=nkjp
tag=subst:sg:nom:m3
; use guesser module from corpuslib
[ma:guess]
class=wrap_convert
wrapped_class=guesser
wrapped_converter=naive-kipi2nkjp.conv
wrapped_tagset=kipi
tagset=nkjp
[ma:morfeusz]
class=morfeusz
tagset=nkjp
converter=sgjp2nkjp.conv
require_version=Morfeusz SGJP
[rule]
toki_type=p
ma=interp
[rule]
toki_type=tu
ma=url
[default]
ma=morfeusz
ma=guess
ma=unknown