Blame view

walenty/ENIAMwalTEI.ml 33.2 KB
Wojciech Jaworski authored
1
(*
Wojciech Jaworski authored
2
3
 *  ENIAMwalenty, a converter for Polish Valence Dictionary "Walenty".
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
Wojciech Jaworski authored
4
 *  Copyright (C) 2016 Maciej Hołubowicz
Wojciech Jaworski authored
5
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
Wojciech Jaworski authored
6
 *
Wojciech Jaworski authored
7
8
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
Wojciech Jaworski authored
9
10
11
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
Wojciech Jaworski authored
12
 *  This library is distributed in the hope that it will be useful,
Wojciech Jaworski authored
13
14
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Wojciech Jaworski authored
15
 *  GNU Lesser General Public License for more details.
Wojciech Jaworski authored
16
 *
Wojciech Jaworski authored
17
 *  You should have received a copy of the GNU Lesser General Public License
Wojciech Jaworski authored
18
19
20
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)
Wojciech Jaworski authored
21
open ENIAMwalTypes
Wojciech Jaworski authored
22
23
24
25
26
open Xstd

type id = {hash: bool; suffix: string; numbers: int list}

let empty_id = {hash = false; suffix = ""; numbers = []}
Wojciech Jaworski authored
27
28
29
30
31
32

let parse_id s =
  if String.length s = 0 then empty_id else
  if String.length s < 6 then failwith "za krótkie id"  else
    let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
    if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
Wojciech Jaworski authored
33
      let s = String.sub s 4 (String.length s - 4) in
Wojciech Jaworski authored
34
35
      let s,suf = match Str.split (Str.regexp "-") s with
          [s;suf] -> s,suf
Wojciech Jaworski authored
36
37
        | _ -> failwith ("parse_id: zła ilość '-' " ^ s) in
      let id = {hash = hash; suffix = suf; numbers = try Xlist.map (Xstring.split "\\." s) int_of_string with _ -> failwith ("parse_id: " ^ s)} in
Wojciech Jaworski authored
38
39
      id
Wojciech Jaworski authored
40
41
42
let string_of_id id =
  (if id.hash then "#" else "") ^ "wal_" ^ (String.concat "." (Xlist.map id.numbers string_of_int)) ^ "-" ^ id.suffix
Wojciech Jaworski authored
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
type tei =
    Symbol of string
  | TEIstring of string
  | Binary of bool
  | Numeric of int
  | F of string * tei
  | Fset of string * tei list
  | Fs of string * tei list
  | Id of id
  | SameAs of id * string

let rec tei_to_string = function
    Symbol s -> Printf.sprintf "Symbol %s" s
  | TEIstring s -> Printf.sprintf "String %s" s
  | Binary b -> Printf.sprintf "Binary %s" (string_of_bool b)
  | Numeric n -> Printf.sprintf "Numeric %d" n
  | F(s,t) -> Printf.sprintf "F(%s,%s)" s (tei_to_string t)
  | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
  | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
Wojciech Jaworski authored
62
  | Id id -> Printf.sprintf "Id(%s)" (string_of_id id)
Wojciech Jaworski authored
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s

let rec parse_tei = function
    Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) ->
    Fset(name,List.rev (Xlist.map set parse_tei))
  | Xml.Element("f", ["name",name],[]) -> Fset(name,[])
  | Xml.Element("f", ["name",name],[tei]) -> F(name,parse_tei tei)
  | Xml.Element("f", ["name",name],set) -> Fset(name,List.rev (Xlist.map set parse_tei))
  | Xml.Element("fs", ["type",name], l) -> Fs(name,List.rev (Xlist.rev_map l parse_tei))
  | Xml.Element("fs", ["xml:id",id;"type",name], l) -> Fs(name,Id(parse_id id) :: List.rev (Xlist.rev_map l parse_tei))
  | Xml.Element("symbol",["value",value],[]) -> Symbol value
  | Xml.Element("string",[], [Xml.PCData s]) -> TEIstring s
  | Xml.Element("string",[], []) -> TEIstring ""
  | Xml.Element("binary",["value",value],[]) -> Binary(try bool_of_string value with _ -> failwith "parse_tei")
  | Xml.Element("numeric",["value",value],[]) -> Numeric(try int_of_string value with _ -> failwith "parse_tei")
  | Xml.Element("fs", ["sameAs", same_as; "type",name], []) -> SameAs(parse_id same_as,name)
  | Xml.Element("fs", ["sameAs", same_as], []) -> SameAs(parse_id same_as,"")
  | xml -> failwith ("parse_tei: " ^ Xml.to_string_fmt xml)

let parse_gf = function
    "subj" -> SUBJ
  | "obj" -> OBJ
  | s -> failwith ("parse_gf: " ^ s)

let parse_control arg = function
    "controller" -> {arg with cr="1" :: arg.cr}
Wojciech Jaworski authored
89
  | "controllee" -> {arg with ce="1" :: arg.ce}
Wojciech Jaworski authored
90
  | "controller2" -> {arg with cr="2" :: arg.cr}
Wojciech Jaworski authored
91
  | "controllee2" -> {arg with ce="2" :: arg.ce}
Wojciech Jaworski authored
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
  | s -> failwith ("parse_control: " ^ s)

let parse_case = function
    "nom" -> Case "nom"
  | "gen" -> Case "gen"
  | "dat" -> Case "dat"
  | "acc" -> Case "acc"
  | "inst" -> Case "inst"
  | "loc" -> Case "loc"
  | "str" -> Str
  | "pred" -> Case "pred"
  | "part" -> Part
  | "postp" -> Case "postp"
  | "agr" -> CaseAgr
  | s -> failwith ("parse_case: " ^ s)

let parse_aspect = function
    "perf" -> Aspect "perf"
  | "imperf" -> Aspect "imperf"
  | "_" -> AspectUndef
  | "" -> AspectNA
  | s -> failwith ("parse_aspect: " ^ s)

let parse_negation = function
    "_" -> NegationUndef
  | "neg" -> Negation
  | "aff" -> Aff
  | "" -> NegationNA
  | s -> failwith ("parse_negation: " ^ s)

let parse_number = function
    "sg" -> Number "sg"
  | "pl" -> Number "pl"
  | "agr" -> NumberAgr
  | "_" -> NumberUndef
  | s -> failwith ("parse_number: " ^ s)

let parse_gender = function
    "m1" -> Gender "m1"
  | "m3" -> Gender "m3"
Wojciech Jaworski authored
132
  | "n" -> Gender "n"(*Genders["n1";"n2"]*)
Wojciech Jaworski authored
133
  | "f" -> Gender "f"
Wojciech Jaworski authored
134
  | "m1.n" -> Genders["m1";"n"(*"n1";"n2"*)]
Wojciech Jaworski authored
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  | "_" -> GenderUndef
  | "agr" -> GenderAgr
  | s -> failwith ("parse_gender: " ^ s)

let parse_grad = function
    "pos" -> Grad "pos"
  | "com" -> Grad "com"
  | "sup" -> Grad "sup"
  | "_" -> GradUndef
  | s -> failwith ("parse_grad: " ^ s)

let rec parse_restr = function
    "natr" -> Natr
  | "atr" -> Atr
  | "ratr" -> Ratr
  | "atr1" -> Atr1
  | "ratr1" -> Ratr1
  | s -> failwith ("parse_restr: " ^ s)


let parse_comp = function
    "int" -> Int,[]
  | "rel" -> Rel,[]
  | "co" -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *)
  | "kto" -> CompTypeUndef,[Comp "kto"] (* subst *)
  | "ile" -> CompTypeUndef,[Comp "ile"] (* num adv *)
  | "jaki" -> CompTypeUndef,[Comp "jaki"] (* adj *)
  | "który" -> CompTypeUndef,[Comp "który"] (* adj *)
  | "czyj" -> CompTypeUndef,[Comp "czyj"] (* adj *)
  | "jak" -> CompTypeUndef,[Comp "jak"] (* prep conj adv *)
  | "kiedy" -> CompTypeUndef,[Comp "kiedy"] (* comp adv *)
  | "gdzie" -> CompTypeUndef,[Comp "gdzie"] (* qub adv *)
  | "odkąd" -> CompTypeUndef,[Comp "odkąd"] (* adv *)
  | "skąd" -> CompTypeUndef,[Comp "skąd"] (* adv *)
  | "dokąd" -> CompTypeUndef,[Comp "dokąd"] (* adv *)
  | "którędy" -> CompTypeUndef,[Comp "którędy"] (* adv *)
  | "dlaczego" -> CompTypeUndef,[Comp "dlaczego"] (* adv *)
  | "czemu" -> CompTypeUndef,[Comp "czemu"] (* adv *)
  | "czy" -> CompTypeUndef,[Comp "czy"] (* qub conj *)
  | "jakby" -> CompTypeUndef,[Comp "jakby"] (* qub comp *)
  | "jakoby" -> CompTypeUndef,[Comp "jakoby"] (* qub comp *)
  | "gdy" -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *)
  | "dopóki" -> CompTypeUndef,[Comp "dopóki"] (* comp *)
  | "zanim" -> CompTypeUndef,[Comp "zanim"] (* comp *)
  | "jeśli" -> CompTypeUndef,[Comp "jeśli"] (* comp *)
  | "żeby2" -> CompTypeUndef,[Zeby]
  | "żeby" -> CompTypeUndef,[Comp "żeby"] (* qub comp *)
  | "że" -> CompTypeUndef,[Comp "że"] (* qub comp *)
  | "aż" -> CompTypeUndef,[Comp "aż"] (* qub comp *)
  | "bo" -> CompTypeUndef,[Comp "bo"] (* qub comp *)
  | s -> failwith ("parse_comp: " ^ s)

let load_type_constrains = function
  | Symbol value ->
    (match parse_comp value with
       CompTypeUndef,[c] -> c
     | _ -> failwith "load_type_constrains")
  | xml -> failwith ("load_type_constrains:\n " ^ tei_to_string xml)

let load_ctype = function
  | F("type",Fs("type_def", x)) ->
    (match x with
     | [F("conjunction",Symbol value)] -> parse_comp value
     | [F("conjunction",Symbol value);Fset("constraints",set)] ->
       (match parse_comp value with
          CompTypeUndef, _ -> failwith "load_ctype"
        | ctype,[] -> ctype, List.rev (Xlist.rev_map set load_type_constrains)
        | _ -> failwith "load_ctype")
     | l -> failwith ("load_ctype 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
  | xml -> failwith ("load_ctype:\n " ^ tei_to_string xml)

let load_lemmas_set = function
  | TEIstring mstring -> mstring
  | xml -> failwith ("load_lemmas_set:\n " ^ tei_to_string xml)

let check_lemma s =
  match Str.full_split (Str.regexp "(\\|)") s with
    [Str.Text s] -> Lexeme s
  | [Str.Text "E"; Str.Delim "("; Str.Text g; Str.Delim ")"] -> Elexeme(parse_gender g)
  | _ -> failwith "check_lemma"

let make_lemma = function
  | _,_,[lemma] -> check_lemma lemma
  | "XOR","concat",lemmas -> XOR(Xlist.map lemmas check_lemma)
  | "OR","coord",lemmas -> ORcoord(Xlist.map lemmas check_lemma)
  | "OR","concat",lemmas -> ORconcat(Xlist.map lemmas check_lemma)
  | _ -> failwith "make_lemma"

let process_lex_phrase lemma = function
    NP(case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [SUBST(number,case),lemma]
  | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma]
  | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma]
  | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma]
Wojciech Jaworski authored
228
229
  | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,ReflTrue -> [INF(aspect,negation),lemma;QUB,Lexeme "się"]
  | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation),lemma]
Wojciech Jaworski authored
230
231
  | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma]
  | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma]
Wojciech Jaworski authored
232
233
234
235
  | PactP(case),number,gender,GradUndef,negation,ReflTrue -> [PACT(number,case,gender,AspectUndef,negation),lemma;QUB,Lexeme "się"]
  | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation),lemma]
  | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,ReflTrue -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma;QUB,Lexeme "się"]
  | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma]
Wojciech Jaworski authored
236
  | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma]
Wojciech Jaworski authored
237
  | AdvP(mode),NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma]
Wojciech Jaworski authored
238
239
240
241
242
  | phrase,number,gender,grad,negation,reflex ->
    Printf.printf "%s %s %s %s %s %s\n" (ENIAMwalStringOf.phrase phrase) (ENIAMwalStringOf.number number)
      (ENIAMwalStringOf.gender gender) (ENIAMwalStringOf.grad grad) (ENIAMwalStringOf.negation negation) (ENIAMwalStringOf.refl reflex); []

let new_schema r cr ce morfs =
Wojciech Jaworski authored
243
  {psn_id=(-1); gf=r; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=cr; ce=ce; morfs=morfs}
Wojciech Jaworski authored
244
245

let rec process_lex lex = function
Wojciech Jaworski authored
246
  | Phrase(ComparP prep),arguments,Lexeme "",Lexeme "" ->
Wojciech Jaworski authored
247
    LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [morf])))
Wojciech Jaworski authored
248
  | PhraseAbbr(Xp mode,[argument]),_,_,_ ->
Wojciech Jaworski authored
249
250
    let lex = {lex with lex_argument=argument; lex_mode=mode :: lex.lex_mode} in
    process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)
Wojciech Jaworski authored
251
  (* | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme ""  ->
Wojciech Jaworski authored
252
    let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
Wojciech Jaworski authored
253
    LexPhrase(poss,lex.lex_modification) *)
Wojciech Jaworski authored
254
255
256
257
258
259
260
261
  | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma  -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" ->
    if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)
        else LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma],lex.lex_modification)
  | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" ->
    if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)
        else LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma],lex.lex_modification)
Wojciech Jaworski authored
262
263
264
265
266
267
268
269
270
  | Phrase phrase,[],lemma,Lexeme ""  ->
    let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
    LexPhrase(poss,lex.lex_modification)
  | (argument,arguments,lemma,numeral_lemma) ->
    let s = Printf.sprintf "%s [%s] %s %s\n" (ENIAMwalStringOf.morf argument)
      (String.concat ";" (Xlist.map arguments ENIAMwalStringOf.morf))
      (ENIAMwalStringOf.lex lemma) (ENIAMwalStringOf.lex numeral_lemma) in
    failwith ("process_lex: " ^ s)
Wojciech Jaworski authored
271
272
273
274
(* UWAGA: refl_id może się zmienić wraz z wersją Walentego *)
let refl_id = 25
let refl_position = {empty_position with role="Lemma"; mode=["lemma"]; morfs=[MorfId refl_id]}
Wojciech Jaworski authored
275
276
277
278
279
let rec load_category = function
  | F("category",Fs("category_def",x)) ->
    (match x with
     | [F("name",Symbol value)] -> value, []
     | [F("name",Symbol value);Fset("constraints",set)] ->
Wojciech Jaworski authored
280
       value, List.rev (Xlist.rev_map set (load_phrase (ref [])))
Wojciech Jaworski authored
281
282
283
284
285
286
     | l -> failwith ("load_category 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
  | xml -> failwith ("load_category:\n " ^ tei_to_string xml)

and load_modification_def = function (*pomocnicza do load_lex *)
  | [F("type",Symbol value)] -> parse_restr value, []
  | [F("type",Symbol value); Fset("positions",set)] ->
Wojciech Jaworski authored
287
    parse_restr value, List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty)))
Wojciech Jaworski authored
288
289
290
  | x -> Printf.printf "%s\n" (tei_to_string (List.hd x));
    failwith "load_modification_def:\n"
Wojciech Jaworski authored
291
292
293
294
295
and load_lex arg xml = match xml with
  | F("argument",set) ->
    let mode = ref [] in
    let a = load_phrase mode set in
    {arg with lex_argument = a; lex_mode = !mode}
Wojciech Jaworski authored
296
  | Fset("arguments",set) ->
Wojciech Jaworski authored
297
    {arg with lex_arguments=List.rev (Xlist.rev_map set (load_phrase (ref [])))}
Wojciech Jaworski authored
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  | F("modification",Fs("modification_def",x)) -> {arg with lex_modification = load_modification_def x}
  | F("lemma",Fs("lemma_def",[F("selection_mode",Symbol value1);
                              F("cooccurrence",Symbol value2);
                              Fset("lemmas",lemmas)])) ->
    {arg with lex_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}
  |  F("numeral_lemma",Fs("numeral_lemma_def",[F("selection_mode",Symbol value1);
                                               F("cooccurrence",Symbol value2);
                                               Fset("lemmas",lemmas)])) ->
    {arg with lex_numeral_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}
  | F("negation",Symbol value) -> {arg with lex_negation = parse_negation value}
  | F("degree",Symbol value) -> {arg with lex_degree = parse_grad value}
  | F("number",Symbol value) -> {arg with lex_number = parse_number value}
  | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue}
  | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse}
  | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty}
  | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value}
  | xml ->
    Printf.printf "%s\n" (tei_to_string xml);
    failwith "load_lex:\n "
Wojciech Jaworski authored
318
and load_phrase mode = function
Wojciech Jaworski authored
319
320
321
322
323
324
325
326
327
328
  | Fs("np",[F("case",Symbol a)]) -> Phrase (NP(parse_case a));
  | Fs("prepnp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNP(a, parse_case b))
  | Fs("adjp", [F("case",Symbol a)]) -> Phrase (AdjP(parse_case a))
  | Fs("prepadjp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepAdjP(a, parse_case b))
  | Fs("comprepnp", [e;F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))
  | Fs("comprepnp", [F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))
  | Fs("cp", [a]) -> PhraseComp(Cp,load_ctype a)
  | Fs("ncp", [F("case",Symbol a);b]) -> PhraseComp(Ncp(parse_case a),load_ctype b)
  | Fs("prepncp", [F("preposition",Symbol a);F("case",Symbol b);c]) -> PhraseComp(Prepncp(a, parse_case b),load_ctype c)
  | Fs("infp", [F("aspect",Symbol a)]) -> Phrase (InfP(parse_aspect a))
Wojciech Jaworski authored
329
330
  | Fs("xp", [a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y)
  | Fs("xp", [e;a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y)
Wojciech Jaworski authored
331
332
  | Fs("advp", [F("category",Symbol a)]) -> mode:=a :: !mode; Phrase(AdvP(a))
  | Fs("advp", [e;F("category",Symbol a)]) -> mode:=a :: !mode; Phrase(AdvP(a))
Wojciech Jaworski authored
333
  | Fs("nonch", []) -> mode:="nonch" :: !mode; PhraseAbbr(Nonch,[])
Wojciech Jaworski authored
334
  | Fs("or", []) -> Phrase Or
Wojciech Jaworski authored
335
  | Fs("refl", []) -> mode:="refl" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))
Wojciech Jaworski authored
336
337
  | Fs("E", []) -> E Null
  | Fs("lex", x) ->
Wojciech Jaworski authored
338
    let lex = Xlist.fold x empty_lex load_lex in
Wojciech Jaworski authored
339
    mode := lex.lex_mode @ !mode;
Wojciech Jaworski authored
340
341
    process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)
  | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b))
Wojciech Jaworski authored
342
343
  | Fs("possp", [e]) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])
  | Fs("possp", []) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])
Wojciech Jaworski authored
344
  | Fs("recip", []) -> mode:="recip" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))
Wojciech Jaworski authored
345
346
347
  | Fs("distrp", [e]) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])
  | Fs("distrp", []) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])
  | Fs("compar", [F("compar_category",Symbol value)]) -> Phrase(ComparP value)
Wojciech Jaworski authored
348
349
350
351
352
353
354
355
  | Fs("gerp", [F("case",Symbol a)]) -> Phrase (GerP(parse_case a))
  | Fs("prepgerp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepGerP(a, parse_case b))
  | Fs("nump", [F("case",Symbol a)]) -> Phrase (NumP(parse_case a))
  | Fs("prepnump", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNumP(a, parse_case b))
  | Fs("ppasp", [F("case",Symbol a)]) -> Phrase (PpasP(parse_case a))
  | Fs("prepppasp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepPpasP(a, parse_case b))
  | Fs("qub", []) -> Phrase Qub
  | Fs("pactp", [F("case",Symbol a)]) -> Phrase (PactP(parse_case a))
Wojciech Jaworski authored
356
  | Fs("adverb",[F("adverb",Symbol s)]) -> LexPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[]))
Wojciech Jaworski authored
357
358
  | xml -> failwith ("load_phrase match:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
359
and load_phrase_id ent sch psn phrases mode = function
Wojciech Jaworski authored
360
    | Fs(morf,Id{hash=false; numbers=[ent_id;sch_id;psn_id;id]; suffix="phr"} :: l) ->
Wojciech Jaworski authored
361
362
      if ent_id = ent && sch_id = sch && psn_id = psn then
       let morf = load_phrase mode (Fs(morf, l)) in
Wojciech Jaworski authored
363
364
365
       phrases := IntMap.add_inc (!phrases) id morf (fun morf2 -> if morf = morf2 then morf else failwith "load_phrase_id");
       MorfId id
     else failwith (Printf.sprintf "load_phrase %d %d" ent ent_id)
Wojciech Jaworski authored
366
    | Fs(morf, l) -> load_phrase mode (Fs(morf, l))
Wojciech Jaworski authored
367
    | _ -> failwith "load_phrase_id"
Wojciech Jaworski authored
368
369
370
371
372
373


and load_control arg = function
  | Symbol  value -> parse_control arg value
  | xml -> failwith ("load_control:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
374
and load_position_info ent sch phrases arg = function
Wojciech Jaworski authored
375
376
  | F("function",Symbol  value) -> {arg with gf = parse_gf value}
  | Fset("phrases",phrases_set) ->
Wojciech Jaworski authored
377
378
379
    let mode = ref [] in
    let morfs = List.rev (Xlist.rev_map phrases_set (load_phrase_id ent sch arg.psn_id phrases mode)) in
    {arg with morfs = morfs; mode = StringSet.to_list (StringSet.of_list (!mode))}
Wojciech Jaworski authored
380
  | Fset("control",control_set) -> Xlist.fold control_set arg load_control
Wojciech Jaworski authored
381
382
383
  | Id{hash=false; numbers=[ent_id;sch_id;id]; suffix="psn"} ->
     if ent_id = ent && sch_id = sch then {arg with psn_id = id}
     else failwith (Printf.sprintf "load_position_info %d %d" ent ent_id)
Wojciech Jaworski authored
384
385
  | xml -> failwith ("load_position_info:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
386
and load_position ent sch phrases = function
Wojciech Jaworski authored
387
  | Fs("position", listt) ->
Wojciech Jaworski authored
388
    Xlist.fold listt empty_position (load_position_info ent sch phrases)
Wojciech Jaworski authored
389
390
391
  | xml -> failwith ("load_position:\n " ^ tei_to_string xml)

let parse_opinion = function
Wojciech Jaworski authored
392
    "cer" -> Pewny
Wojciech Jaworski authored
393
394
395
396
397
  | "col" -> Potoczny
  | "unc" -> Watpliwy
  | "dat" -> Archaiczny
  | "bad" -> Zly
  | "vul" -> Wulgarny
Wojciech Jaworski authored
398
  | "unk" -> Nieokreslony
Wojciech Jaworski authored
399
  | "met" -> Metaforyczny
Wojciech Jaworski authored
400
401
402
403
404
  | "dom" -> Dziedzinowy
  | "rar" -> Sporadyczny
  | "wątpliwy" -> Watpliwy
  | "dobry" -> Pewny
  | "zły" -> Zly
Wojciech Jaworski authored
405
406
  | x -> failwith ("parse_opinion: " ^ x)
Wojciech Jaworski authored
407
let load_schema_info ent phrases (arg:schema) = function
Wojciech Jaworski authored
408
  | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
Wojciech Jaworski authored
409
  | F("inherent_sie",Binary b) -> {arg with reflexiveMark = b}
Wojciech Jaworski authored
410
411
412
413
414
415
416
  | F("aspect",Symbol aspect_value) -> {arg with aspect = parse_aspect aspect_value}
  | Fset("aspect", []) -> arg
  | F("negativity",Symbol negativity_value) -> {arg with negativity = parse_negation negativity_value}
  | Fset("negativity",[]) -> arg
  | F("predicativity",Binary true) -> {arg with predicativity = PredTrue}
  | F("predicativity",Binary false) -> {arg with predicativity = PredFalse}
  | Fset("positions", positions) ->
Wojciech Jaworski authored
417
    {arg with positions = List.rev (Xlist.rev_map positions (load_position ent arg.sch_id phrases))}
Wojciech Jaworski authored
418
  | F("text_rep",TEIstring text_rep) -> {arg with text_rep = text_rep}
Wojciech Jaworski authored
419
  | Id{hash=false; numbers=[ent_id;id]; suffix="sch"} -> if ent_id = ent then {arg with sch_id = id} else failwith (Printf.sprintf "load_schema_info %d %d" ent ent_id)
Wojciech Jaworski authored
420
421
  | xml -> failwith ("load_schema_info\n " ^ tei_to_string xml)
Wojciech Jaworski authored
422
let load_schema ent phrases = function
Wojciech Jaworski authored
423
    Fs("schema", schema) ->
Wojciech Jaworski authored
424
    let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = false; aspect = AspectUndef;
Wojciech Jaworski authored
425
                  negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in
Wojciech Jaworski authored
426
    let result = Xlist.fold schema result (load_schema_info ent phrases) in
Wojciech Jaworski authored
427
428
429
    result
  | xml -> failwith ("load_schema:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
430
431
432
433
let load_phrases_set ent = function
  | SameAs({hash=true; numbers=[ent_id;sch_id;psn_id;phr_id]; suffix="phr"},"phrase") ->
      if ent_id <> ent then failwith (Printf.sprintf "load_phrases_set %d %d" ent ent_id) else
      sch_id,psn_id,phr_id
Wojciech Jaworski authored
434
435
  | xml -> failwith ("load_phrases_set :\n " ^ tei_to_string xml)
Wojciech Jaworski authored
436
let load_example_info ent arg = function
Wojciech Jaworski authored
437
438
  | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) ->
      if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
Wojciech Jaworski authored
439
  | Fset("phrases",phrases_set) ->
Wojciech Jaworski authored
440
    {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))}
Wojciech Jaworski authored
441
442
443
444
  | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}
  | F("source",Symbol source_value) -> {arg with source = source_value}
  | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
  | F("note",TEIstring note_string) -> {arg with note = note_string}
Wojciech Jaworski authored
445
  | Id{hash=false; numbers=[ent_id;id]; suffix="exm"} -> if ent_id = ent then {arg with exm_id = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
Wojciech Jaworski authored
446
  | xml -> failwith ("load_example_info: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
447
Wojciech Jaworski authored
448
let load_example ent = function
Wojciech Jaworski authored
449
  | Fs("example",example_elements) ->
Wojciech Jaworski authored
450
    let result = {exm_id = (-1); meaning = (-1); phrases = []; sentence = "";
Wojciech Jaworski authored
451
                  source = ""; opinion = OpinionUndef; note = "";} in
Wojciech Jaworski authored
452
    let result = Xlist.fold example_elements result (load_example_info ent) in
Wojciech Jaworski authored
453
    result
Wojciech Jaworski authored
454
  | xml -> failwith ("load_example: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
455
Wojciech Jaworski authored
456
457
458
let load_self_prefs_sets name ent frm = function
  | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets"
  | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets"
Wojciech Jaworski authored
459
  | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) ->
Wojciech Jaworski authored
460
461
462
    if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id)
    else RelationArgId(value,arg_id)
  | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
463
Wojciech Jaworski authored
464
let load_argument_self_prefs ent frm = function
Wojciech Jaworski authored
465
  | Fset(name,self_prefs_set) ->
Wojciech Jaworski authored
466
467
    List.rev (Xlist.rev_map self_prefs_set (load_self_prefs_sets name ent frm))
  | xml -> failwith ("load_argument_self_prefs: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
468
Wojciech Jaworski authored
469
let load_argument_info ent frm arg = function
Wojciech Jaworski authored
470
471
472
  | F("role",Symbol value) -> {arg with role = value}
  | F("role_attribute",Symbol value) -> {arg with role_attribute = value}
  | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->
Wojciech Jaworski authored
473
    {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))}
Wojciech Jaworski authored
474
475
476
477
  (* | Id id -> {arg with arg_id = id} *)
  | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} ->
     if ent_id = ent && frm_id = frm then {arg with arg_id = id}
     else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id)
Wojciech Jaworski authored
478
479
  | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)
Wojciech Jaworski authored
480
let load_arguments_set ent frm = function
Wojciech Jaworski authored
481
  | Fs("argument", info) ->
Wojciech Jaworski authored
482
483
    let result = {arg_id = (-1); role = ""; role_attribute = ""; sel_prefs = []} in
    let result = Xlist.fold info result (load_argument_info ent frm) in
Wojciech Jaworski authored
484
485
486
    result
  | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)
Wojciech Jaworski authored
487
488
489
let load_meanings_set ent = function
  | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") ->
      if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)
Wojciech Jaworski authored
490
491
  | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)
Wojciech Jaworski authored
492
let load_frame ent = function
Wojciech Jaworski authored
493
  | Fs("frame",[
Wojciech Jaworski authored
494
      Id{hash=false; numbers=[ent_id;id]; suffix="frm"};
Wojciech Jaworski authored
495
496
497
      F("opinion",Symbol opinion);
      Fset("meanings",meanings_set);
      Fset("arguments",arguments_set)]) ->
Wojciech Jaworski authored
498
    if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else
Wojciech Jaworski authored
499
    {frm_id = id;
Wojciech Jaworski authored
500
     opinion = parse_opinion opinion;
Wojciech Jaworski authored
501
502
     meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent));
     arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))}
Wojciech Jaworski authored
503
504
  | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)
Wojciech Jaworski authored
505
let load_meaning_info ent arg = function
Wojciech Jaworski authored
506
507
508
509
  | F("name",TEIstring name_string) -> {arg with name = name_string}
  | F("variant",TEIstring variant_string) -> {arg with variant = variant_string}
  | F("plwnluid",Numeric value) -> {arg with plwnluid = value}
  | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string}
Wojciech Jaworski authored
510
  | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id)
Wojciech Jaworski authored
511
512
513
  | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
514
let load_meaning ent = function
Wojciech Jaworski authored
515
  | Fs("lexical_unit", meaning_info) ->
Wojciech Jaworski authored
516
    Xlist.fold meaning_info empty_meaning (load_meaning_info ent)
Wojciech Jaworski authored
517
518
  | xml -> failwith ("load_meaning:\n " ^ tei_to_string xml)
Wojciech Jaworski authored
519
let load_alter_connection ent = function
Wojciech Jaworski authored
520
  | Fs("connection", [
Wojciech Jaworski authored
521
      F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument"));
Wojciech Jaworski authored
522
      Fset("phrases",phrases)]) ->
Wojciech Jaworski authored
523
524
525
526
527
528
529
530
   if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else
     let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase ->
       let sch_id,psn_id,phr_id = load_phrases_set ent phrase in
       IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l),
       IntSet.add sch_set sch_id) in
     if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else
     IntSet.min_elt sch_set, frm_id,
     {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)}
Wojciech Jaworski authored
531
532
  | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
533
let load_alternations ent = function
Wojciech Jaworski authored
534
  | Fs("alternation",[Fset("connections",connections_set)]) ->
Wojciech Jaworski authored
535
536
537
538
539
540
      let conns,sch_set,frm_set = Xlist.fold connections_set ([],IntSet.empty,IntSet.empty) (fun (conns,sch_set,frm_set) conn ->
        let sch_id,frm_id,conn = load_alter_connection ent conn in
        conn :: conns, IntSet.add sch_set sch_id, IntSet.add frm_set frm_id) in
      if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alternations: |sch_set|=%d" (IntSet.size sch_set)) else
      if IntSet.size frm_set <> 1 then failwith (Printf.sprintf "load_alternations: |frm_set|=%d" (IntSet.size sch_set)) else
      {schema=IntSet.min_elt sch_set; frame=IntSet.min_elt frm_set; connections=List.rev conns}
Wojciech Jaworski authored
541
542
  | xml -> failwith ("load_alternations: \n " ^ tei_to_string xml)
Wojciech Jaworski authored
543
let load_entry phrases = function
Wojciech Jaworski authored
544
  | Xml.Element("entry",["xml:id",id], l) ->
Wojciech Jaworski authored
545
    (* print_endline id; *)
Wojciech Jaworski authored
546
547
548
549
    let id = match parse_id id with
        {hash=false; numbers=[id]; suffix="ent"} -> id
      | _ -> failwith "process_meanings" in
    let entry = {empty_entry with ent_id = id} in
Wojciech Jaworski authored
550
551
552
    Xlist.fold l entry (fun e -> function
          Xml.Element("form", [], [
            Xml.Element("orth",[],[Xml.PCData orth]);
Wojciech Jaworski authored
553
            Xml.Element("pos",[],[Xml.PCData pos])]) -> (*print_endline orth;*) {e with form_orth=orth; form_pos=pos}
Wojciech Jaworski authored
554
        | xml -> (match parse_tei xml with
Wojciech Jaworski authored
555
            | Fs("syntactic_layer", [Fset("schemata",schemata_set)]) -> {e with schemata = List.rev (Xlist.rev_map schemata_set (load_schema id phrases))}
Wojciech Jaworski authored
556
557
558
559
            | Fs("examples_layer", [Fset("examples",examples_set)]) -> {e with examples = List.rev (Xlist.rev_map examples_set (load_example id))}
            | Fs("semantic_layer", [Fset("frames",frame_set)]) -> {e with frames = List.rev (Xlist.rev_map frame_set (load_frame id))}
            | Fs("meanings_layer", [Fset("meanings",meanings_set)]) -> {e with meanings = List.rev (Xlist.rev_map meanings_set (load_meaning id))}
            | Fs("connections_layer",[Fset("alternations",alternations)]) -> {e with alternations = List.rev (Xlist.rev_map alternations (load_alternations id))}
Wojciech Jaworski authored
560
561
562
563
            | Fs("general_info",[F("status",TEIstring status)]) -> {e with status=status}
            | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))
   | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
Wojciech Jaworski authored
564
let load_walenty filename =
Wojciech Jaworski authored
565
566
567
568
569
  begin
    match Xml.parse_file filename with
      Xml.Element("TEI", _,
                  [Xml.Element("teiHeader",_,_) ;
                   Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
Wojciech Jaworski authored
570
571
572
        let phrases = ref IntMap.empty in
        let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in
        walenty, !phrases
Wojciech Jaworski authored
573
574
575
    | _ -> failwith "load_walenty"
  end
Wojciech Jaworski authored
576
577
578
579
580
let correct_expansion = function
    [{gf=ARG; cr=[]; ce=[]; morfs=[Phrase(FixedP s)]};p] -> [LexPhrase([FIXED,Lexeme s],(Ratr,[p]))]
  | [{gf=ARG; cr=[]; ce=[]; morfs=[LexPhrase([pos,Lexeme "własny"],(Natr,[]))]};{morfs=[a;b]} as p] ->
    [a;b;LexPhrase([pos,Lexeme "własny"],(Atr,[p]))]
  | _ -> failwith "correct_expansion"
Wojciech Jaworski authored
581
582

let load_expansion = function
Wojciech Jaworski authored
583
    Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",[p])]) -> [load_phrase (ref []) p]
Wojciech Jaworski authored
584
  | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> correct_expansion (List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty))))
Wojciech Jaworski authored
585
586
587
588
  | tei -> failwith ("load_expansion: \n" ^ tei_to_string tei)

let load_rentry = function
  | Xml.Element("entry",["xml:id",id], [phrase;exp]) ->
Wojciech Jaworski authored
589
590
591
    let id = match parse_id id with
        {hash=false; numbers=[id]; suffix="exp"} -> id
      | _ -> failwith "process_meanings" in
Wojciech Jaworski authored
592
    let morf = load_phrase (ref []) (parse_tei phrase) in
Wojciech Jaworski authored
593
    let expansions = match parse_tei exp with
Wojciech Jaworski authored
594
595
        | Fs("phrase_type_expansions", [Fset("expansions",expansions)]) -> List.flatten (List.rev (Xlist.rev_map expansions load_expansion))
        | Fs("phrase_type_expansions", [F("expansions",expansion)]) -> load_expansion expansion
Wojciech Jaworski authored
596
597
598
599
        | tei -> failwith ("load_entry: \n" ^ tei_to_string tei) in
    id,morf,expansions
  | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
Wojciech Jaworski authored
600
let expands_supplement = [
Wojciech Jaworski authored
601
  (-2), PhraseAbbr(Nonch,[]), [
Wojciech Jaworski authored
602
603
604
605
    LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[]));
    LexPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[]));
    LexPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[]));
    LexPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[]));
Wojciech Jaworski authored
606
    ];
Wojciech Jaworski authored
607
  (-3), Phrase (AdvP "pron"), [
Wojciech Jaworski authored
608
609
    LexPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[]));
    LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[]))
Wojciech Jaworski authored
610
    ]]
Wojciech Jaworski authored
611
Wojciech Jaworski authored
612
613
614
615
616
617
618
619
620
let load_expands filename =
  begin
    match Xml.parse_file filename with
      Xml.Element("TEI", _,
                  [Xml.Element("teiHeader",_,_) ;
                   Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
      expands_supplement @ List.rev (Xlist.rev_map entries load_rentry)
    | _ -> failwith "load_walenty"
  end
Wojciech Jaworski authored
621
622
623

let subtypes = [
  "int",[
Wojciech Jaworski authored
624
625
    "co"; "czemu"; "czy"; "czyj"; "dlaczego"; "dokąd"; "gdzie"; "ile"; "jak";
    "jaki"; "kiedy"; "kto"; "którędy"; "który"; "odkąd"; "skąd"; "jakoby"];
Wojciech Jaworski authored
626
627
628
  "rel",[
    "co"; "dokąd"; "gdzie"; "jak"; "jakby"; "jaki"; "jakoby"; "kiedy"; "kto";
    "którędy"; "który"; "odkąd"; "skąd"]]
Wojciech Jaworski authored
629
Wojciech Jaworski authored
630
let equivs = ["jak",["niczym"]; "przeciw",["przeciwko"]]