ENIAMwalTypes.ml
9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
(*
* ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open Xstd
type token =
Text of string
| Paren of token list
| Bracet of token list
| SqBra of token list
| LParen | RParen | LBracet | RBracet | LSqBra | RSqBra
| Semic | Plus | Comma | Quot
type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef
type negation = Negation | Aff | NegationUndef | NegationNA
type pred = PredTrue | PredFalse | PredUndef | PredNA
type aspect = Aspect of string | AspectUndef | AspectNA
type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
type comp = Comp of string | Zeby | Gdy | CompUndef
type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
type number = Number of string | NumberUndef | NumberAgr
type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
type grad = Grad of string | GradUndef
type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
type acm = Acm of string | AcmUndef
type sem = Sem | NoSem
(* type req = Req | NReq | ReqUndef *)
type mood = (*Mood of*) string (*| MoodUndef*)
type tense = string
type aux = NoAux | PastAux | FutAux | ImpAux
type nsem = Common of string | Time
type gf = SUBJ | OBJ | ARG
type pos =
SUBST of number * case
| PREP of case
| NUM of case * gender * acm
| ADJ of number * case * gender * grad
| ADV of grad
| GER of number * case * gender * aspect * negation * refl
| PACT of number * case * gender * aspect * negation * refl
| PPAS of number * case * gender * aspect * negation
| INF of aspect * negation * refl
| QUB
| COMPAR
| COMP of comp_type
| PERS of (*number * gender * aspect * person * *)negation * refl
type phrase =
NP of case
| PrepNP of string * case
| AdjP of case
| PrepAdjP of string * case
| NumP of case
| PrepNumP of string * case
| ComprepNP of string
| ComparNP of string * case
| ComparPP of string
| CP of comp_type * comp
| NCP of case * comp_type * comp
| PrepNCP of string * case * comp_type * comp
| InfP of aspect
| AdvP
| FixedP of string
| Num of case * acm
| Or
| Refl
| Recip
| Qub
| Pro
| ProNG
| Null
| GerP of case
| PrepGerP of string * case
| PpasP of case
| PrepPpasP of string * case
| PactP of case
type phrase_abbr =
Xp of string
| Advp of string
| ComparP of string
| Nonch
| Distrp
| Possp
type phrase_comp =
Cp
| Ncp of case
| Prepncp of string * case
type lex =
Lexeme of string
| ORconcat of lex list
| ORcoord of lex list
| XOR of lex list
| Elexeme of gender
type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr
type direction = Forward | Backward | Both
(*type lex_specs =
NSpecs of number
| AdvSpecs of grad
| AdjSpecs of number * gender * grad
| PpasSpecs of number * gender * negation
| PactSpecs of number * gender * negation * refl
| GerSpecs of number * negation * refl
| CSpecs of negation * refl
| NumSpecs of gender
| EmptySpecs *)
type id = {hash: bool; suffix: string; numbers: string list}
let empty_id = {hash = false; suffix = ""; numbers = []}
type position = {psn_id: id; gf: gf; role: string; role_attr: string; sel_prefs: string list;
cr: string list; ce: string list; dir: direction; morfs: (id * morf) list}
and morf =
Phrase of phrase
| E of phrase
| LexPhrase of (pos * lex) list * (restr * position list)
| LexPhraseMode of string * (pos * lex) list * (restr * position list)
| PhraseAbbr of phrase_abbr * morf list
| PhraseComp of phrase_comp * (comp_type * comp list)
| LexPhraseId of string * pos * lex
| LexArg of string * pos * string
(* | LexRealization of morf * string*)
| Raised of string list * direction * string list
| Multi of phrase list
and lex_record = {
lex_argument: morf;
lex_arguments: morf list;
lex_lemma: lex;
lex_numeral_lemma: lex;
lex_negation: negation;
lex_degree: grad;
lex_number: number;
lex_reflex: refl;
lex_gender: gender;
lex_modification: restr * position list;
}
(* and mode = Mode of string * morf list | Pron of morf list | Misc *)
type frame_atrs =
EmptyAtrs of string list
| DefaultAtrs of string list * refl * opinion * negation * pred * aspect
| ComprepAtrs of string
| NounAtrs of string list * string * nsem (** string list*)
| AdjAtrs of string list * case * string (** string * string list*)
| PersAtrs of string list * string * negation * mood * tense * aux * aspect
| GerAtrs of string list * string * negation * aspect
| NonPersAtrs of string list * string * string * string * negation * aspect
type frame =
Frame of frame_atrs * position list
| LexFrame of string * pos * restr * position list
| ComprepFrame of string * pos * restr * position list
(* | FrameR of frame_atrs * (string * string * string list * string list * morf list) list
| LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list
| ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *)
module OrderedAbbr = struct
type t = phrase_abbr
let compare = compare
end
(* module MorfSet = Xset.Make(OrderedMorf) *)
module AbbrMap = Xmap.Make(OrderedAbbr)
module OrderedComp = struct
type t = comp_type
let compare = compare
end
(* module MorfSet = Xset.Make(OrderedMorf) *)
module CompMap = Xmap.Make(OrderedComp)
let resource_path =
try Sys.getenv "ENIAM_RESOURCE_PATH"
with Not_found -> "/usr/share/eniam"
let realizations_filename = resource_path ^ "/Walenty/phrase_types_expand_20150909.txt"
let walenty_filename =
try Sys.getenv "WALENTY_LOCALIZATION"
with Not_found -> "/usr/share/walenty/walenty.xml"
let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"
let empty_position =
{psn_id=empty_id; gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}
let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
lex_numeral_lemma=Lexeme ""; lex_negation=NegationUndef;
lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
lex_gender=GenderUndef; lex_modification = Natr,[]}
type schema = {sch_id: id; opinion: opinion; reflexiveMark: refl; aspect: aspect;
negativity: negation; predicativity: pred; positions: position list; text_rep: string}
type example = {exm_id: id;
meaning: id;
phrases: id list;
sentence: string;
source: string;
opinion: opinion;
note: string}
type sel_prefs =
NumericP of int
| SymbolP of string
| RelationP of string * id
type argument = {arg_id: id;
role: string;
role_attribute: string;
sel_prefs: sel_prefs list list}
type frame2 = {frm_id: id;
opinion: string;
meanings: id list;
arguments: argument list}
type meaning = {mng_id: id;
name: string;
variant: string;
plwnluid: int;
gloss: string}
let empty_meaning = {mng_id = empty_id;
name = "";
variant = "";
plwnluid = (-1);
gloss = ""}
type connection = {argument: id;
phrases: id list}
type alternation = {connections: connection list}
type entry = {ent_id: id;
status: string;
form_orth: string;
form_pos: string;
schemata: schema list;
examples: example list;
frames: frame2 list;
meanings: meaning list;
alternations: alternation list}
let empty_entry = {ent_id=empty_id; status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
frames=[]; meanings=[]; alternations=[]}