; Data from Morfeusz, converted to the simplified KIPI tagset (possible genders: n, m1, m2, m3, f) [general] tagset=kipi ; default config, but let's be explicit toki-config=config [ma:unknown] class=const tagset=kipi tag=ign [ma:interp] class=const tagset=kipi tag=interp [ma:patch] ; a couple of Morfeusz corrections class=map tagset=kipi data=morfpatch-kipi.txt [ma:morfeusz] class=morfeusz tagset=kipi converter=morfeusz2kipi.conv [ma:acro_stem] ; acronyms and proper names class=sfst tagset=kipi file=acro-uninfl.fst lower-case=true [ma:acro_suffix] ; acronyms/proper names with hyphens or apostrophes introducing inflectional suffixes class=sfst tagset=kipi file=acro-infl.fst lower-case=true [rule] toki_type=p ma=interp [rule] toki_type=th ma=morfeusz ma=acro_suffix ma=unknown [default] ma=patch ma=morfeusz ma=acro_stem ma=unknown ; here come the tagset extensions from TaKIPI ; tnum tnt ; tsym ; tdate ; ttime ; turi ; tmail [rule] toki_type=ts ma=ext_sym [rule] toki_type=n ma=ext_num_int [rule] toki_type=n_f ma=ext_num_frac [rule] toki_type=n_d ma=ext_date [rule] toki_type=n_t ma=ext_time [rule] toki_type=tm ma=ext_mail [rule] toki_type=tu,n_ip ma=ext_url ; [ma:ext_sym] class=const tagset=kipi tag=tsym [ma:ext_num_int] class=const tagset=kipi tag=tnum:integer [ma:ext_num_frac] class=const tagset=kipi tag=tnum:frac [ma:ext_date] class=const tagset=kipi tag=tdate [ma:ext_time] class=const tagset=kipi tag=ttime [ma:ext_mail] class=const tagset=kipi tag=tmail [ma:ext_url] class=const tagset=kipi tag=turi