Blame view

tools/maca/data/morfeusz-nkjp-guesser.ini 1.21 KB
Jan Lupa authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
; MorfeuszSGJP outputting in NKJP but with fallback on Guesser from libcorpus1, naively converted to NKJP.
; NOTE: this config assumes Morfeusz SGJP with tweaked soname is installed.
; This version is used internally by MACA authors to have both Morfeusz SIaT and SGJP
; coexist peacefully (which is normally impossible).
; If you want to use the official SGJP version (and not to use SIaT), please use
; morfeusz-nkjp-official-guesser config.

[general]
	tagset=nkjp
	; dot and hyphen sequences as separate tokens
	toki-config=nkjp

[ma:unknown]
	class=const
	tagset=nkjp
	tag=ign

[ma:interp]
	class=const
	tagset=nkjp
	tag=interp

[ma:url]
	class=const
	tagset=nkjp
	tag=subst:sg:nom:m3

; use guesser module from corpuslib
[ma:guess]
	class=wrap_convert
	wrapped_class=guesser
	wrapped_converter=naive-kipi2nkjp.conv
	wrapped_tagset=kipi
	tagset=nkjp

[ma:morfeusz]
	class=morfeusz
	tagset=nkjp
	converter=sgjp2nkjp.conv
	library=libmorfSGJP.so.0
; comment out the above line if Morfeusz SGJP is installed normally and not alongside Morfeusz SIAT in a binary-modified version with the soname changed
	require_version=Morfeusz SGJP
[rule]
	toki_type=p
	ma=interp

[rule]
	toki_type=tu
	ma=url

[default]
	ma=morfeusz
	ma=guess
	ma=unknown