morfeusz-kipi-ext.ini 1.56 KB
; Data from Morfeusz, converted to the simplified KIPI tagset (possible genders: n, m1, m2, m3, f)
[general]
	tagset=kipi
	; default config, but let's be explicit
	toki-config=config

[ma:unknown]
	class=const
	tagset=kipi
	tag=ign

[ma:interp]
	class=const
	tagset=kipi
	tag=interp

[ma:patch]
; a couple of Morfeusz corrections
	class=map
	tagset=kipi
	data=morfpatch-kipi.txt

[ma:morfeusz]
	class=morfeusz
	tagset=kipi
	converter=morfeusz2kipi.conv

[ma:acro_stem]
; acronyms and proper names
	class=sfst
	tagset=kipi
	file=acro-uninfl.fst
	lower-case=true

[ma:acro_suffix]
; acronyms/proper names with hyphens or apostrophes introducing inflectional suffixes
	class=sfst
	tagset=kipi
	file=acro-infl.fst
	lower-case=true

[rule]
	toki_type=p
	ma=interp

[rule]
	toki_type=th
	ma=morfeusz
	ma=acro_suffix
	ma=unknown

[default]
	ma=patch
	ma=morfeusz
	ma=acro_stem
	ma=unknown

; here come the tagset extensions from TaKIPI
; tnum	tnt
; tsym
; tdate
; ttime
; turi
; tmail
[rule]
	toki_type=ts
	ma=ext_sym
[rule]
	toki_type=n
	ma=ext_num_int
[rule]
	toki_type=n_f
	ma=ext_num_frac
[rule]
	toki_type=n_d
	ma=ext_date
[rule]
	toki_type=n_t
	ma=ext_time
[rule]
	toki_type=tm
	ma=ext_mail
[rule]
	toki_type=tu,n_ip
	ma=ext_url
;

[ma:ext_sym]
	class=const
	tagset=kipi
	tag=tsym
[ma:ext_num_int]
	class=const
	tagset=kipi
	tag=tnum:integer
[ma:ext_num_frac]
	class=const
	tagset=kipi
	tag=tnum:frac
[ma:ext_date]
	class=const
	tagset=kipi
	tag=tdate
[ma:ext_time]
	class=const
	tagset=kipi
	tag=ttime
[ma:ext_mail]
	class=const
	tagset=kipi
	tag=tmail
[ma:ext_url]
	class=const
	tagset=kipi
	tag=turi