|
1
|
(*
|
|
2
|
* ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
|
|
3
4
5
|
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
|
|
6
7
|
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
|
|
8
9
10
|
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
|
|
11
|
* This library is distributed in the hope that it will be useful,
|
|
12
13
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
* GNU Lesser General Public License for more details.
|
|
15
|
*
|
|
16
|
* You should have received a copy of the GNU Lesser General Public License
|
|
17
18
19
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
|
|
20
|
open ENIAMwalTypes
|
|
21
22
|
let rec token = function
|
|
23
|
Text s -> s
|
|
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
| Paren l -> "(" ^ String.concat "" (Xlist.map l token) ^ ")"
| Bracet l -> "{" ^ String.concat "" (Xlist.map l token) ^ "}"
| SqBra l -> "[" ^ String.concat "" (Xlist.map l token) ^ "]"
| LParen -> "("
| RParen -> ")"
| LBracet -> "{"
| RBracet -> "}"
| LSqBra -> "["
| RSqBra -> "]"
| Semic -> ";"
| Plus -> "+"
| Comma -> ","
| Quot -> "'"
let token_list l =
String.concat "" (Xlist.map l token)
|
|
40
|
|
|
41
42
43
44
45
46
47
48
|
let opinion = function
Pewny -> "pewny"
| Potoczny -> "potoczny"
| Watpliwy -> "wątpliwy"
| Archaiczny -> "archaiczny"
| Zly -> "zły"
| Wulgarny -> "wulgarny"
| Domyslny -> "domyślny"
|
|
49
|
|
|
50
51
52
53
54
|
let negation = function
Negation -> "neg"
| Aff -> "aff"
| NegationUndef -> "_"
| NegationNA -> ""
|
|
55
|
|
|
56
|
let pred = function
|
|
57
|
PredNA -> ""
|
|
58
|
| Pred -> "pred"
|
|
59
|
|
|
60
61
62
63
|
let aspect = function
Aspect s -> s
| AspectUndef -> "_"
| AspectNA -> ""
|
|
64
|
|
|
65
66
67
68
69
70
71
72
73
74
75
76
|
let case = function
Case s -> s
| Str -> "str"
| Part -> "part"
| CaseAgr -> "agr"
| CaseUAgr -> "uagr"
| NomAgr -> "nomagr"
| GenAgr -> "genagr"
| AllAgr -> "allagr"
| AllUAgr -> "alluagr"
| CaseUndef -> "_"
|
|
77
|
let rec comp = function
|
|
78
79
80
81
82
83
84
85
86
87
88
89
|
Comp s -> s
| Zeby -> "żeby2"
| Gdy -> "gdy"
| CompUndef -> "_"
let rec comp_type = function
Int -> "int"
| Rel -> "rel"
| Sub -> "sub"
| Coord -> "coord"
| CompTypeUndef -> "_"
| CompTypeAgr -> "agr"
|
|
90
|
|
|
91
92
93
94
|
let number = function
Number s -> s
| NumberAgr -> "agr"
| NumberUndef -> "_"
|
|
95
|
|
|
96
97
98
99
100
|
let gender = function
Gender s -> s
| GenderUndef -> "_"
| GenderAgr -> "agr"
| Genders l -> String.concat "." l
|
|
101
|
|
|
102
103
104
|
let grad = function
Grad s -> s
| GradUndef -> "_"
|
|
105
|
|
|
106
107
108
|
let refl = function
ReflEmpty -> ""
| ReflSie -> "się"
|
|
109
|
|
|
110
111
112
|
let acm = function
Acm s -> s
| AcmUndef -> "_"
|
|
113
|
|
|
114
115
116
|
let sem = function
Sem -> "sem"
| NoSem -> "nosem"
|
|
117
118
|
(*let req = function
|
|
119
120
121
|
Req -> ",req"
| NReq -> ",nreq"
| ReqUndef -> ""*)
|
|
122
|
|
|
123
124
125
126
127
128
129
130
131
132
133
|
let gf = function
SUBJ -> "subj"
| OBJ -> "obj"
| ARG -> "arg"(*""*)
| CORE -> "core"
| NOSEM -> "nosem"
| NOGF -> "nogf"
| ADJUNCT -> "adjunct"
| RAISED -> "raised"
| CLAUSE -> "clause"
| SENTENCE -> "sentence"
|
|
134
|
|
|
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
let pos = function
SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
| PREP(c) -> "PREP(" ^ case c ^ ")"
| NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")"
| ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")"
| ADV(gr) -> "ADV(" ^ grad gr ^ ")"
| GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
| PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")"
| PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")"
| INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")"
| QUB -> "QUB"
| COMPAR -> "COMPAR"
| COMP(c) -> "COMP(" ^ comp_type c ^ ")"
| PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")"
|
|
149
|
|
|
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
|
let phrase = function
NP c -> "np(" ^ case c ^ ")"
| PrepNP(s,prep,c) -> "prepnp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
| AdjP c -> "adjp(" ^ case c ^ ")"
| PrepAdjP(s,prep,c) -> "prepadjp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
| NumP(c) -> "nump(" ^ case c ^ ")"
| PrepNumP(s,prep,c) -> "prepnump(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
| ComprepNP(s,prep) -> "comprepnp(" ^ sem s ^ "," ^ prep ^ ")"
| ComparNP(s,prep,c) -> "comparnp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
| ComparPP(s,prep) -> "comparpp(" ^ sem s ^ "," ^ prep ^ ")"
| IP -> "ip"
| CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")"
| NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
| PrepNCP(s,prep,c,ct,co) -> "prepncp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
| InfP(a(*,r*)) -> "infp(" ^ aspect a (*^ req r*) ^ ")"
| PadvP -> "padvp"
| AdvP -> "advp"
|
|
167
|
| FixedP s -> "fixed(" ^ s ^ ")"
|
|
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
| PrepP -> "prepp"
| Prep(prep,c) -> "prep(" ^ prep ^ "," ^ case c ^ ")"
| Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")"
| Measure(c) -> "measure(" ^ case c ^ ")"
| Or -> "or"
(* | Refl -> "refl" *)
(* | Recip -> "recip" *)
| Qub -> "qub"
| Inclusion -> "inclusion"
| Adja -> "adja"
| AuxPast -> "aux-past"
| AuxFut -> "aux-fut"
| AuxImp -> "aux-imp"
| Aglt -> "aglt"
| Pro -> "pro"
| ProNG -> "prong"
| Null -> "null"
| X -> "x"
| Lex s -> "lex(" ^ s ^ ")"
|
|
187
|
|
|
188
189
190
191
192
193
194
|
let phrase_abbr = function
Xp(m) -> "xp(" ^ m ^ ")"
| Advp(m) -> "advp(" ^ m ^ ")"
| ComparP prep -> "compar(" ^ prep ^ ")"
| Nonch -> "nonch"
| Distrp -> "distrp"
| Possp -> "possp"
|
|
195
|
|
|
196
197
198
199
|
let phrase_comp = function
Cp -> "cp"
| Ncp(c) -> "ncp(" ^ case c ^ ")"
| Prepncp(prep,c) -> "prepncp(" ^ prep ^ "," ^ case c ^ ")"
|
|
200
|
|
|
201
202
203
204
205
206
|
let rec lex = function
Lexeme s -> "'" ^ s ^ "'"
| ORconcat l -> "OR(" ^ String.concat "," (Xlist.map l lex) ^ ")"
| ORcoord l -> "OR(" ^ String.concat ";" (Xlist.map l lex) ^ ")"
| XOR l -> "XOR(" ^ String.concat "," (Xlist.map l lex) ^ ")"
| Elexeme g -> "'E(" ^ gender g ^ ")"
|
|
207
208
|
let restr = function
|
|
209
210
211
212
213
214
215
216
217
218
219
220
221
|
Natr -> "natr"
| Atr -> "atr"
| Ratr -> "ratr"
| Ratrs -> "ratrs"
| Atr1 -> "atr1"
| Ratr1 -> "ratr1"
(* | Ratr1,s -> "ratr1(" ^ schema s ^ ")"
| Atr1,s -> "atr1(" ^ schema s ^ ")"
| Ratr,s -> "ratr(" ^ schema s ^ ")"
| Atr,s -> "atr(" ^ schema s ^ ")"
| Ratrs,s -> "ratrs(" ^ schema s ^ ")"
*) | NoRestr -> ""
(* | NoRestr,s -> schema s *)
|
|
222
|
|
|
223
224
225
226
|
let controllers l =
Xlist.map l (function
"1" -> "controller"
| n -> "controller" ^ n)
|
|
227
|
|
|
228
229
230
231
232
|
let controllees l =
Xlist.map l (function
"1" -> "controllee"
| n -> "controllee" ^ n)
|
|
233
|
(*let lex_specs = function
|
|
234
235
236
237
238
239
240
241
242
|
NSpecs num -> number num
| AdvSpecs gr -> grad gr
| AdjSpecs(num,g,gr) -> number num ^ "," ^ gender g ^ "," ^ grad gr
| PpasSpecs(num,g,neg) -> number num ^ "," ^ gender g ^ "," ^ negation neg
| PactSpecs(num,g,neg,r) -> number num ^ "," ^ gender g ^ "," ^ negation neg ^ "," ^ refl r
| GerSpecs(num,neg,r) -> number num ^ "," ^ negation neg ^ "," ^ refl r
| CSpecs(neg,r) -> negation neg ^ "," ^ refl r
| NumSpecs g -> gender g
| EmptySpecs -> ""*)
|
|
243
|
|
|
244
245
246
|
let mood = function
(*Mood*) s -> s
(*| MoodUndef -> "_"*)
|
|
247
|
|
|
248
249
250
251
252
253
254
|
let tense t = t
let aux = function
NoAux -> "-"
| PastAux -> "aux-past"
| FutAux -> "aux-fut"
| ImpAux -> "aux-imp"
|
|
255
|
|
|
256
257
258
|
let nsem = function
Common s -> s
| Time -> "time"
|
|
259
|
|
|
260
261
262
263
264
|
let direction = function
Forward -> "/"
| Backward -> "\\"
| Both -> "|"
|
|
265
266
|
let rec schema schema =
String.concat "+" (Xlist.map schema (fun s ->
|
|
267
268
269
270
|
String.concat "," (
(if s.gf = ARG then [] else [gf s.gf])@
(if s.role = "" then [] else [s.role])@
(if s.role_attr = "" then [] else [s.role_attr])@
|
|
271
|
s.sel_prefs@(controllers s.cr)@(controllees s.ce)) ^ direction s.dir ^ "{" ^ String.concat ";" (Xlist.map s.morfs morf) ^ "}"))
|
|
272
|
|
|
273
274
|
(*and schema_role schema =
String.concat "+" (Xlist.map schema (fun (r,role,cr,ce,morfs) ->
|
|
275
276
|
String.concat "," ((if r = "" then [] else [r])@(if role = "" then [] else [role])@(controllers cr)@(controllees ce)) ^ "{" ^ String.concat ";" (Xlist.map morfs morf) ^ "}")) *)
|
|
277
|
and morf = function
|
|
278
279
280
281
282
283
284
285
286
287
288
|
Phrase p -> phrase p
| E p -> "E(" ^ phrase p ^ ")"
| LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
| LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
| PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]"
| PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]"
| LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")"
| LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")"
(* | LexRealization(mrf,le) -> "lex(" ^ morf mrf ^ "," ^ le ^ ")"*)
| Raised(mrf1,dir,mrf2) -> "raised([" ^ String.concat ";" mrf1 ^ "]," ^ direction dir ^ "[" ^ String.concat ";" mrf2 ^ "])"
| Multi l -> "multi(" ^ String.concat ";" (Xlist.map l phrase) ^ ")"
|
|
289
|
|
|
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
|
(*and mode = function
Mode(s,[]) -> s
| Mode(s,l) -> s ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]"
(* | Mod l -> "mod[...]" *)
| Pron [] -> "pron"
| Pron l -> "pron" ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]"
| Misc -> "misc"*)
let frame_atrs = function
DefaultAtrs(m,r,o,neg,p,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s" (String.concat "; " m) (refl r) (opinion o) (negation neg) (pred p) (aspect a)
| EmptyAtrs m -> Printf.sprintf "%s" (String.concat "; " m)
| NounAtrs(m,nsyn,s(*,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " m) nsyn (nsem s) (*(String.concat ";" typ)*)
| AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " m) (case c) adjsyn (*adjsem (String.concat ";" typ)*)
| PersAtrs(m,le,neg,mo,t,au,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s: %s" (String.concat "; " m) le (negation neg) (mood mo) (tense t) (aux au) (aspect a)
| GerAtrs(m,le,neg,a) -> Printf.sprintf "%s: %s: %s: %s" (String.concat "; " m) le (negation neg) (aspect a)
| NonPersAtrs(m,le,role,role_attr,neg,a) -> Printf.sprintf "%s: %s: %s,%s: %s: %s" (String.concat "; " m) le role role_attr (negation neg) (aspect a)
| _ -> failwith "WalStringOf.frame_atrs"
|
|
307
|
|
|
308
|
let frame lexeme = function
|
|
309
|
Frame(atrs,s) ->
|
|
310
|
Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema s)
|
|
311
|
| LexFrame(id,p,r,s) ->
|
|
312
|
Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema s)
|
|
313
|
| ComprepFrame(le,p,r,s) ->
|
|
314
|
Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema s)
|
|
315
|
(* | FrameR(atrs,s) ->
|
|
316
|
Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema_role s)
|
|
317
|
| LexFrameR(id,p,r,s) ->
|
|
318
|
Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema_role s)
|
|
319
|
| ComprepFrameR(le,p,r,s) ->
|
|
320
321
322
323
|
Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema_role s)*)
(* | _ -> failwith "WalStringOf.frame" *)
let fnum_frame lexeme = function
|
|
324
|
fnum,Frame(atrs,s) ->
|
|
325
|
Printf.sprintf "%d: %s: %s: %s" fnum lexeme (frame_atrs atrs) (schema s)
|
|
326
|
| fnum,LexFrame(id,p,r,s) ->
|
|
327
|
Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme id (pos p) (restr r) (schema s)
|
|
328
|
| fnum,ComprepFrame(le,p,r,s) ->
|
|
329
|
Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme le (pos p) (restr r) (schema s)
|
|
330
|
|
|
331
|
let unparsed_frame lexeme (r,o,neg,p,a,s) = lexeme ^ " " ^ String.concat ": " [r;o;neg;p;a;s]
|