morfeusz-kipi-guesser.ini 968 Bytes
; Data from Morfeusz, converted to the simplified KIPI tagset (possible genders: n, m1, m2, m3, f)
[general]
	tagset=kipi
	; default config, but let's be explicit
	toki-config=config

[ma:unknown]
	class=const
	tagset=kipi
	tag=ign

[ma:interp]
	class=const
	tagset=kipi
	tag=interp

[ma:patch]
; a couple of Morfeusz corrections
	class=map
	tagset=kipi
	data=morfpatch-kipi.txt

; use guesser module from corpuslib
[ma:guess]
	class=guesser
	tagset=kipi

[ma:morfeusz]
	class=morfeusz
	tagset=kipi
	converter=morfeusz2kipi.conv

[ma:acro_stem]
; acronyms and proper names
	class=sfst
	tagset=kipi
	file=acro-uninfl.fst
	lower-case=true

[ma:acro_suffix]
; acronyms/proper names with hyphens or apostrophes introducing inflectional suffixes
	class=sfst
	tagset=kipi
	file=acro-infl.fst
	lower-case=true

[rule]
	toki_type=p
	ma=interp

[rule]
	toki_type=th
	ma=morfeusz
	ma=acro_suffix
	ma=unknown

[default]
	ma=patch
	ma=morfeusz
	ma=acro_stem
	ma=guess
	ma=unknown