morfeusz-ikipi-ext.ini
1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
; Data from Morfeusz, converted to the intermediate tagset (ikipi)
[general]
tagset=ikipi
; default config, but let's be explicit
toki-config=config
[ma:unknown]
class=const
tagset=ikipi
tag=ign
[ma:interp]
class=const
tagset=ikipi
tag=interp
[ma:patch]
; the patch doesn't violate ikipi
class=map
tagset=ikipi
data=morfpatch-kipi.txt
[ma:morfeusz]
class=morfeusz
tagset=ikipi
converter=morfkipi2ikipi.conv
[ma:acro_stem]
; acronyms and proper names, work for both kipi and ikipi
class=sfst
tagset=ikipi
file=acro-uninfl.fst
lower-case=true
[ma:acro_suffix]
; acronyms/proper names with hyphens or apostrophes introducing inflectional suffixes; work for both kipi and ikipi
class=sfst
tagset=ikipi
file=acro-infl.fst
lower-case=true
[rule]
toki_type=p
ma=interp
[rule]
toki_type=th
ma=morfeusz
ma=acro_suffix
ma=unknown
[default]
ma=patch
ma=morfeusz
ma=acro_stem
ma=unknown
; tagset extensions from TaKIPI
[rule]
toki_type=ts
ma=ext_sym
[rule]
toki_type=n
ma=ext_num_int
[rule]
toki_type=n_f
ma=ext_num_frac
[rule]
toki_type=n_d
ma=ext_date
[rule]
toki_type=n_t
ma=ext_time
[rule]
toki_type=tm
ma=ext_mail
[rule]
toki_type=tu,n_ip
ma=ext_url
[ma:ext_sym]
class=const
tagset=ikipi
tag=tsym
[ma:ext_num_int]
class=const
tagset=ikipi
tag=tnum:integer
[ma:ext_num_frac]
class=const
tagset=ikipi
tag=tnum:frac
[ma:ext_date]
class=const
tagset=ikipi
tag=tdate
[ma:ext_time]
class=const
tagset=ikipi
tag=ttime
[ma:ext_mail]
class=const
tagset=ikipi
tag=tmail
[ma:ext_url]
class=const
tagset=ikipi
tag=turi