morfeusz-ikipi-ext.ini 1.54 KB
; Data from Morfeusz, converted to the intermediate tagset (ikipi)
[general]
	tagset=ikipi
	; default config, but let's be explicit
	toki-config=config

[ma:unknown]
	class=const
	tagset=ikipi
	tag=ign

[ma:interp]
	class=const
	tagset=ikipi
	tag=interp

[ma:patch]
; the patch doesn't violate ikipi
	class=map
	tagset=ikipi
	data=morfpatch-kipi.txt

[ma:morfeusz]
	class=morfeusz
	tagset=ikipi
	converter=morfkipi2ikipi.conv

[ma:acro_stem]
; acronyms and proper names, work for both kipi and ikipi
	class=sfst
	tagset=ikipi
	file=acro-uninfl.fst
	lower-case=true

[ma:acro_suffix]
; acronyms/proper names with hyphens or apostrophes introducing inflectional suffixes; work for both kipi and ikipi
	class=sfst
	tagset=ikipi
	file=acro-infl.fst
	lower-case=true

[rule]
	toki_type=p
	ma=interp

[rule]
	toki_type=th
	ma=morfeusz
	ma=acro_suffix
	ma=unknown

[default]
	ma=patch
	ma=morfeusz
	ma=acro_stem
	ma=unknown

; tagset extensions from TaKIPI
[rule]
	toki_type=ts
	ma=ext_sym
[rule]
	toki_type=n
	ma=ext_num_int
[rule]
	toki_type=n_f
	ma=ext_num_frac
[rule]
	toki_type=n_d
	ma=ext_date
[rule]
	toki_type=n_t
	ma=ext_time
[rule]
	toki_type=tm
	ma=ext_mail
[rule]
	toki_type=tu,n_ip
	ma=ext_url

[ma:ext_sym]
	class=const
	tagset=ikipi
	tag=tsym
[ma:ext_num_int]
	class=const
	tagset=ikipi
	tag=tnum:integer
[ma:ext_num_frac]
	class=const
	tagset=ikipi
	tag=tnum:frac
[ma:ext_date]
	class=const
	tagset=ikipi
	tag=tdate
[ma:ext_time]
	class=const
	tagset=ikipi
	tag=ttime
[ma:ext_mail]
	class=const
	tagset=ikipi
	tag=tmail
[ma:ext_url]
	class=const
	tagset=ikipi
	tag=turi