Commit cdcea6621a46bb7412051ca37b9ef7c516a53ba0

Authored by Wojciech Jaworski
1 parent 6c86906e

scalenie zdań i akapitów w NKJP1M

Showing 1 changed file with 36 additions and 614 deletions
NKJP2/ENIAM_NKJP.ml
@@ -199,10 +199,16 @@ let rec merge_tokens name id_p rev = function @@ -199,10 +199,16 @@ let rec merge_tokens name id_p rev = function
199 let id_div,id_ab,beg,len = parse_seg_corresp corresp in( 199 let id_div,id_ab,beg,len = parse_seg_corresp corresp in(
200 (* if id_div <> id_p then (*failwith*)print_endline (Printf.sprintf "merge_tokens 4: %s %d %s" name id_p corresp); (*else*) *) 200 (* if id_div <> id_p then (*failwith*)print_endline (Printf.sprintf "merge_tokens 4: %s %d %s" name id_p corresp); (*else*) *)
201 let lemma,cat,interp = parse_disamb disamb in 201 let lemma,cat,interp = parse_disamb disamb in
202 - merge_tokens name id_p ((id_div,id_ab,beg,nps,len,orth,lemma,cat,interp) :: rev) (segmentation,morphosyntax)) 202 + merge_tokens name id_p ((id_div,id_ab,(beg,len,nps,orth,lemma,cat,interp)) :: rev) (segmentation,morphosyntax))
203 | [],[] -> List.rev rev 203 | [],[] -> List.rev rev
204 | _ -> failwith "merge_tokens 1" 204 | _ -> failwith "merge_tokens 1"
205 205
  206 +let rec split_sentences id_div id_ab rev rev2 = function
  207 + (id_div2,id_ab2,token) :: l ->
  208 + if id_div = id_div2 && id_ab = id_ab2 then split_sentences id_div id_ab (token :: rev) rev2 l else
  209 + split_sentences id_div2 id_ab2 [token] ((id_div,id_ab,List.rev rev) :: rev2) l
  210 + | [] -> List.rev ((id_div,id_ab,List.rev rev) :: rev2)
  211 +
206 let rec merge_sentences name id_p rev = function 212 let rec merge_sentences name id_p rev = function
207 ({corref=""; prefix="segm"; numbers=[id_segm_p;id_segm_s]; suffix="s"},segm_tokens) :: segmentation, 213 ({corref=""; prefix="segm"; numbers=[id_segm_p;id_segm_s]; suffix="s"},segm_tokens) :: segmentation,
208 ({corref="ann_segmentation.xml"; prefix="segm"; numbers=[c_segm_p;c_segm_s]; suffix="s"}, 214 ({corref="ann_segmentation.xml"; prefix="segm"; numbers=[c_segm_p;c_segm_s]; suffix="s"},
@@ -211,10 +217,34 @@ let rec merge_sentences name id_p rev = function @@ -211,10 +217,34 @@ let rec merge_sentences name id_p rev = function
211 if id_segm_p <> c_segm_p || id_segm_p <> id_morph_p then failwith "merge_sentences 2" else 217 if id_segm_p <> c_segm_p || id_segm_p <> id_morph_p then failwith "merge_sentences 2" else
212 if id_segm_s <> c_segm_s || c_segm_s <> id_morph_s then failwith "merge_sentences 3" else 218 if id_segm_s <> c_segm_s || c_segm_s <> id_morph_s then failwith "merge_sentences 3" else
213 let tokens = merge_tokens name id_p [] (segm_tokens,morph_tokens) in 219 let tokens = merge_tokens name id_p [] (segm_tokens,morph_tokens) in
214 - merge_sentences name id_p ((id_segm_p,id_segm_s,tokens) :: rev) (segmentation,morphosyntax) 220 + let id_s = string_of_int id_segm_p ^ "." ^ string_of_int id_segm_s in
  221 + if tokens = [] then failwith "merge_sentences 4" else
  222 + let id_div,id_ab,token = List.hd tokens in
  223 + let l = match split_sentences id_div id_ab [token] [] tokens with
  224 + [id_div,id_ab,tokens] -> [id_div,id_ab,id_s,tokens]
  225 + | [id_div1,id_ab1,tokens1;id_div2,id_ab2,tokens2] -> [id_div2,id_ab2,id_s^"b",tokens2;id_div1,id_ab1,id_s^"a",tokens1]
  226 + | [id_div1,id_ab1,tokens1;id_div2,id_ab2,tokens2;id_div3,id_ab3,tokens3] -> [id_div3,id_ab3,id_s^"c",tokens3;id_div2,id_ab2,id_s^"b",tokens2;id_div1,id_ab1,id_s^"a",tokens1]
  227 + | _ -> failwith (Printf.sprintf "merge_sentences 5: %s %d %d" name id_div id_ab) in
  228 + merge_sentences name id_p (l @ rev) (segmentation,morphosyntax)
215 | [],[] -> List.rev rev 229 | [],[] -> List.rev rev
216 | _ -> failwith "merge_sentences" 230 | _ -> failwith "merge_sentences"
217 231
  232 +let rec merge_paragraph id_div id_ab rev = function
  233 + (id_div2,id_ab2,id_s,tokens) :: sentences ->
  234 + if id_div <> id_div2 || id_ab <> id_ab2 then List.rev rev, (id_div2,id_ab2,id_s,tokens) :: sentences
  235 + else merge_paragraph id_div id_ab ((id_s,tokens) :: rev) sentences
  236 + | [] -> List.rev rev, []
  237 +
  238 +let rec merge_paragraphs name id_p rev = function
  239 + ({corref=""; prefix="txt"; numbers=[id_div;id_ab]; suffix="ab"},paragraph) :: paragraphs,
  240 + (id_div2,id_ab2,id_s,tokens) :: sentences ->
  241 + if id_div <> id_div2 && id_ab <> id_ab2 then failwith "merge_paragraphs 1" else
  242 + let l,sentences = merge_paragraph id_div id_ab [id_s,tokens] sentences in
  243 + (* Printf.printf "%d.%d: %s\n" id_div id_ab (String.concat " " (Xlist.map l fst)); *)
  244 + merge_paragraphs name id_p ((paragraph,l) :: rev) (paragraphs,sentences)
  245 + | [],[] -> List.rev rev
  246 + | _ -> failwith ("merge_paragraphs 2: " ^ name ^ " " ^ string_of_int id_p)
  247 +
218 let rec merge_entries name rev = function 248 let rec merge_entries name rev = function
219 ({corref=""; prefix="txt"; numbers=[id_div]; suffix="div"},paragraphs) :: text, 249 ({corref=""; prefix="txt"; numbers=[id_div]; suffix="div"},paragraphs) :: text,
220 ({corref="text.xml"; prefix="txt"; numbers=[c_div]; suffix="div"}, 250 ({corref="text.xml"; prefix="txt"; numbers=[c_div]; suffix="div"},
@@ -223,7 +253,8 @@ let rec merge_entries name rev = function @@ -223,7 +253,8 @@ let rec merge_entries name rev = function
223 {corref=""; prefix="morph"; numbers=[id_morph_p]; suffix="p"},morph_sentences) :: morphosyntax -> 253 {corref=""; prefix="morph"; numbers=[id_morph_p]; suffix="p"},morph_sentences) :: morphosyntax ->
224 if id_div <> c_div || c_div <> id_segm_p || id_segm_p <> c_segm_p || c_segm_p <> id_morph_p then failwith "merge_entries 2" else 254 if id_div <> c_div || c_div <> id_segm_p || id_segm_p <> c_segm_p || c_segm_p <> id_morph_p then failwith "merge_entries 2" else
225 let sentences = merge_sentences name id_div [] (segm_sentences,morph_sentences) in 255 let sentences = merge_sentences name id_div [] (segm_sentences,morph_sentences) in
226 - merge_entries name ((id_div,paragraphs,sentences) :: rev) (text,segmentation,morphosyntax) 256 + let paragraphs = merge_paragraphs name id_div [] (paragraphs,sentences) in
  257 + merge_entries name ((id_div,paragraphs) :: rev) (text,segmentation,morphosyntax)
227 | [],[],[] -> List.rev rev 258 | [],[],[] -> List.rev rev
228 | _ -> failwith "merge_entries" 259 | _ -> failwith "merge_entries"
229 260
@@ -232,6 +263,7 @@ let nkjp_path = &quot;../../NLP resources/NKJP-PodkorpusMilionowy-1.2/&quot; @@ -232,6 +263,7 @@ let nkjp_path = &quot;../../NLP resources/NKJP-PodkorpusMilionowy-1.2/&quot;
232 let _ = 263 let _ =
233 let names = get_folders nkjp_path in 264 let names = get_folders nkjp_path in
234 Xlist.iter names (fun name -> 265 Xlist.iter names (fun name ->
  266 + if name = "030-2-000000012" then () else
235 (* print_endline name; *) 267 (* print_endline name; *)
236 let typ,channel = load_header nkjp_path name in 268 let typ,channel = load_header nkjp_path name in
237 (* print_endline typ; *) 269 (* print_endline typ; *)
@@ -271,7 +303,7 @@ frekwencje kanałów @@ -271,7 +303,7 @@ frekwencje kanałów
271 28 prasa_tygodnik 303 28 prasa_tygodnik
272 304
273 frekwencje łączne typów-kanałów 305 frekwencje łączne typów-kanałów
274 - 127 fakt ksiazka 306 + 127 fakt ksiazka
275 56 inf-por ksiazka 307 56 inf-por ksiazka
276 283 konwers mowiony 308 283 konwers mowiony
277 2 listy ksiazka 309 2 listy ksiazka
@@ -292,613 +324,3 @@ frekwencje łączne typów-kanałów @@ -292,613 +324,3 @@ frekwencje łączne typów-kanałów
292 387 urzed prasa_inne 324 387 urzed prasa_inne
293 325
294 *) 326 *)
295 -(*  
296 -  
297 -type id = {hash: bool; suffix: string; numbers: int list}  
298 -  
299 -let empty_id = {hash = false; suffix = ""; numbers = []}  
300 -  
301 -let parse_id s =  
302 - if String.length s = 0 then empty_id else  
303 - if String.length s < 6 then failwith "za krótkie id" else  
304 - let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in  
305 - if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else  
306 - let s = String.sub s 4 (String.length s - 4) in  
307 - let s,suf = match Str.split (Str.regexp "-") s with  
308 - [s;suf] -> s,suf  
309 - | _ -> failwith ("parse_id: zła ilość '-' " ^ s) in  
310 - let id = {hash = hash; suffix = suf; numbers = try Xlist.map (Xstring.split "\\." s) int_of_string with _ -> failwith ("parse_id: " ^ s)} in  
311 - id  
312 -  
313 -let string_of_id id =  
314 - (if id.hash then "#" else "") ^ "wal_" ^ (String.concat "." (Xlist.map id.numbers string_of_int)) ^ "-" ^ id.suffix  
315 -  
316 -type tei =  
317 - Symbol of string  
318 - | TEIstring of string  
319 - | Binary of bool  
320 - | Numeric of int  
321 - | F of string * tei  
322 - | Fset of string * tei list  
323 - | Fs of string * tei list  
324 - | Id of id  
325 - | SameAs of id * string  
326 -  
327 -let rec tei_to_string = function  
328 - Symbol s -> Printf.sprintf "Symbol %s" s  
329 - | TEIstring s -> Printf.sprintf "String %s" s  
330 - | Binary b -> Printf.sprintf "Binary %s" (string_of_bool b)  
331 - | Numeric n -> Printf.sprintf "Numeric %d" n  
332 - | F(s,t) -> Printf.sprintf "F(%s,%s)" s (tei_to_string t)  
333 - | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))  
334 - | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))  
335 - | Id id -> Printf.sprintf "Id(%s)" (string_of_id id)  
336 - | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s  
337 -  
338 -let rec parse_tei = function  
339 - Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) ->  
340 - Fset(name,List.rev (Xlist.map set parse_tei))  
341 - | Xml.Element("f", ["name",name],[]) -> Fset(name,[])  
342 - | Xml.Element("f", ["name",name],[tei]) -> F(name,parse_tei tei)  
343 - | Xml.Element("f", ["name",name],set) -> Fset(name,List.rev (Xlist.map set parse_tei))  
344 - | Xml.Element("fs", ["type",name], l) -> Fs(name,List.rev (Xlist.rev_map l parse_tei))  
345 - | Xml.Element("fs", ["xml:id",id;"type",name], l) -> Fs(name,Id(parse_id id) :: List.rev (Xlist.rev_map l parse_tei))  
346 - | Xml.Element("symbol",["value",value],[]) -> Symbol value  
347 - | Xml.Element("string",[], [Xml.PCData s]) -> TEIstring s  
348 - | Xml.Element("string",[], []) -> TEIstring ""  
349 - | Xml.Element("binary",["value",value],[]) -> Binary(try bool_of_string value with _ -> failwith "parse_tei")  
350 - | Xml.Element("numeric",["value",value],[]) -> Numeric(try int_of_string value with _ -> failwith "parse_tei")  
351 - | Xml.Element("fs", ["sameAs", same_as; "type",name], []) -> SameAs(parse_id same_as,name)  
352 - | Xml.Element("fs", ["sameAs", same_as], []) -> SameAs(parse_id same_as,"")  
353 - | xml -> failwith ("parse_tei: " ^ Xml.to_string_fmt xml)  
354 -  
355 -let parse_gf = function  
356 - "subj" -> SUBJ  
357 - | "obj" -> OBJ  
358 - | s -> failwith ("parse_gf: " ^ s)  
359 -  
360 -let parse_control arg = function  
361 - "controller" -> {arg with cr="1" :: arg.cr}  
362 - | "controllee" -> {arg with ce="1" :: arg.cr}  
363 - | "controller2" -> {arg with cr="2" :: arg.cr}  
364 - | "controllee2" -> {arg with ce="2" :: arg.cr}  
365 - | s -> failwith ("parse_control: " ^ s)  
366 -  
367 -let parse_case = function  
368 - "nom" -> Case "nom"  
369 - | "gen" -> Case "gen"  
370 - | "dat" -> Case "dat"  
371 - | "acc" -> Case "acc"  
372 - | "inst" -> Case "inst"  
373 - | "loc" -> Case "loc"  
374 - | "str" -> Str  
375 - | "pred" -> Case "pred"  
376 - | "part" -> Part  
377 - | "postp" -> Case "postp"  
378 - | "agr" -> CaseAgr  
379 - | s -> failwith ("parse_case: " ^ s)  
380 -  
381 -let parse_aspect = function  
382 - "perf" -> Aspect "perf"  
383 - | "imperf" -> Aspect "imperf"  
384 - | "_" -> AspectUndef  
385 - | "" -> AspectNA  
386 - | s -> failwith ("parse_aspect: " ^ s)  
387 -  
388 -let parse_negation = function  
389 - "_" -> NegationUndef  
390 - | "neg" -> Negation  
391 - | "aff" -> Aff  
392 - | "" -> NegationNA  
393 - | s -> failwith ("parse_negation: " ^ s)  
394 -  
395 -let parse_number = function  
396 - "sg" -> Number "sg"  
397 - | "pl" -> Number "pl"  
398 - | "agr" -> NumberAgr  
399 - | "_" -> NumberUndef  
400 - | s -> failwith ("parse_number: " ^ s)  
401 -  
402 -let parse_gender = function  
403 - "m1" -> Gender "m1"  
404 - | "m3" -> Gender "m3"  
405 - | "n" -> Genders["n1";"n2"]  
406 - | "f" -> Gender "f"  
407 - | "m1.n" -> Genders["m1";"n1";"n2"]  
408 - | "_" -> GenderUndef  
409 - | "agr" -> GenderAgr  
410 - | s -> failwith ("parse_gender: " ^ s)  
411 -  
412 -let parse_grad = function  
413 - "pos" -> Grad "pos"  
414 - | "com" -> Grad "com"  
415 - | "sup" -> Grad "sup"  
416 - | "_" -> GradUndef  
417 - | s -> failwith ("parse_grad: " ^ s)  
418 -  
419 -let rec parse_restr = function  
420 - "natr" -> Natr  
421 - | "atr" -> Atr  
422 - | "ratr" -> Ratr  
423 - | "atr1" -> Atr1  
424 - | "ratr1" -> Ratr1  
425 - | s -> failwith ("parse_restr: " ^ s)  
426 -  
427 -  
428 -let parse_comp = function  
429 - "int" -> Int,[]  
430 - | "rel" -> Rel,[]  
431 - | "co" -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *)  
432 - | "kto" -> CompTypeUndef,[Comp "kto"] (* subst *)  
433 - | "ile" -> CompTypeUndef,[Comp "ile"] (* num adv *)  
434 - | "jaki" -> CompTypeUndef,[Comp "jaki"] (* adj *)  
435 - | "który" -> CompTypeUndef,[Comp "który"] (* adj *)  
436 - | "czyj" -> CompTypeUndef,[Comp "czyj"] (* adj *)  
437 - | "jak" -> CompTypeUndef,[Comp "jak"] (* prep conj adv *)  
438 - | "kiedy" -> CompTypeUndef,[Comp "kiedy"] (* comp adv *)  
439 - | "gdzie" -> CompTypeUndef,[Comp "gdzie"] (* qub adv *)  
440 - | "odkąd" -> CompTypeUndef,[Comp "odkąd"] (* adv *)  
441 - | "skąd" -> CompTypeUndef,[Comp "skąd"] (* adv *)  
442 - | "dokąd" -> CompTypeUndef,[Comp "dokąd"] (* adv *)  
443 - | "którędy" -> CompTypeUndef,[Comp "którędy"] (* adv *)  
444 - | "dlaczego" -> CompTypeUndef,[Comp "dlaczego"] (* adv *)  
445 - | "czemu" -> CompTypeUndef,[Comp "czemu"] (* adv *)  
446 - | "czy" -> CompTypeUndef,[Comp "czy"] (* qub conj *)  
447 - | "jakby" -> CompTypeUndef,[Comp "jakby"] (* qub comp *)  
448 - | "jakoby" -> CompTypeUndef,[Comp "jakoby"] (* qub comp *)  
449 - | "gdy" -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *)  
450 - | "dopóki" -> CompTypeUndef,[Comp "dopóki"] (* comp *)  
451 - | "zanim" -> CompTypeUndef,[Comp "zanim"] (* comp *)  
452 - | "jeśli" -> CompTypeUndef,[Comp "jeśli"] (* comp *)  
453 - | "żeby2" -> CompTypeUndef,[Zeby]  
454 - | "żeby" -> CompTypeUndef,[Comp "żeby"] (* qub comp *)  
455 - | "że" -> CompTypeUndef,[Comp "że"] (* qub comp *)  
456 - | "aż" -> CompTypeUndef,[Comp "aż"] (* qub comp *)  
457 - | "bo" -> CompTypeUndef,[Comp "bo"] (* qub comp *)  
458 - | s -> failwith ("parse_comp: " ^ s)  
459 -  
460 -let load_type_constrains = function  
461 - | Symbol value ->  
462 - (match parse_comp value with  
463 - CompTypeUndef,[c] -> c  
464 - | _ -> failwith "load_type_constrains")  
465 - | xml -> failwith ("load_type_constrains:\n " ^ tei_to_string xml)  
466 -  
467 -let load_ctype = function  
468 - | F("type",Fs("type_def", x)) ->  
469 - (match x with  
470 - | [F("conjunction",Symbol value)] -> parse_comp value  
471 - | [F("conjunction",Symbol value);Fset("constraints",set)] ->  
472 - (match parse_comp value with  
473 - CompTypeUndef, _ -> failwith "load_ctype"  
474 - | ctype,[] -> ctype, List.rev (Xlist.rev_map set load_type_constrains)  
475 - | _ -> failwith "load_ctype")  
476 - | l -> failwith ("load_ctype 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))  
477 - | xml -> failwith ("load_ctype:\n " ^ tei_to_string xml)  
478 -  
479 -let load_lemmas_set = function  
480 - | TEIstring mstring -> mstring  
481 - | xml -> failwith ("load_lemmas_set:\n " ^ tei_to_string xml)  
482 -  
483 -let check_lemma s =  
484 - match Str.full_split (Str.regexp "(\\|)") s with  
485 - [Str.Text s] -> Lexeme s  
486 - | [Str.Text "E"; Str.Delim "("; Str.Text g; Str.Delim ")"] -> Elexeme(parse_gender g)  
487 - | _ -> failwith "check_lemma"  
488 -  
489 -let make_lemma = function  
490 - | _,_,[lemma] -> check_lemma lemma  
491 - | "XOR","concat",lemmas -> XOR(Xlist.map lemmas check_lemma)  
492 - | "OR","coord",lemmas -> ORcoord(Xlist.map lemmas check_lemma)  
493 - | "OR","concat",lemmas -> ORconcat(Xlist.map lemmas check_lemma)  
494 - | _ -> failwith "make_lemma"  
495 -  
496 -let process_lex_phrase lemma = function  
497 - NP(case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [SUBST(number,case),lemma]  
498 - | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma]  
499 - | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma]  
500 - | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma]  
501 - | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,ReflTrue -> [INF(aspect,negation),lemma;QUB,Lexeme "się"]  
502 - | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation),lemma]  
503 - | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma]  
504 - | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma]  
505 - | PactP(case),number,gender,GradUndef,negation,ReflTrue -> [PACT(number,case,gender,AspectUndef,negation),lemma;QUB,Lexeme "się"]  
506 - | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation),lemma]  
507 - | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,ReflTrue -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma;QUB,Lexeme "się"]  
508 - | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation),lemma]  
509 - | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma]  
510 - | AdvP(mode),NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma]  
511 - | phrase,number,gender,grad,negation,reflex ->  
512 - Printf.printf "%s %s %s %s %s %s\n" (ENIAMwalStringOf.phrase phrase) (ENIAMwalStringOf.number number)  
513 - (ENIAMwalStringOf.gender gender) (ENIAMwalStringOf.grad grad) (ENIAMwalStringOf.negation negation) (ENIAMwalStringOf.refl reflex); []  
514 -  
515 -let new_schema r cr ce morfs =  
516 - {psn_id=(-1); gf=r; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=cr; ce=ce; morfs=morfs}  
517 -  
518 -let rec process_lex lex = function  
519 - | Phrase(ComparP prep),arguments,Lexeme "",Lexeme "" ->  
520 - LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [morf])))  
521 - | PhraseAbbr(Xp mode,[argument]),_,_,_ ->  
522 - let lex = {lex with lex_argument=argument; lex_mode=mode :: lex.lex_mode} in  
523 - process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)  
524 - (* | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" ->  
525 - let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in  
526 - LexPhrase(poss,lex.lex_modification) *)  
527 - | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)  
528 - | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)  
529 - | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" ->  
530 - if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)  
531 - else LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation),lemma],lex.lex_modification)  
532 - | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" ->  
533 - if lex.lex_reflex = ReflTrue then LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma;QUB,Lexeme "się"],lex.lex_modification)  
534 - else LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation),lemma],lex.lex_modification)  
535 - | Phrase phrase,[],lemma,Lexeme "" ->  
536 - let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in  
537 - LexPhrase(poss,lex.lex_modification)  
538 - | (argument,arguments,lemma,numeral_lemma) ->  
539 - let s = Printf.sprintf "%s [%s] %s %s\n" (ENIAMwalStringOf.morf argument)  
540 - (String.concat ";" (Xlist.map arguments ENIAMwalStringOf.morf))  
541 - (ENIAMwalStringOf.lex lemma) (ENIAMwalStringOf.lex numeral_lemma) in  
542 - failwith ("process_lex: " ^ s)  
543 -  
544 -(* UWAGA: refl_id może się zmienić wraz z wersją Walentego *)  
545 -let refl_id = 25  
546 -let refl_position = {empty_position with role="Lemma"; mode=["lemma"]; morfs=[MorfId refl_id]}  
547 -  
548 -let rec load_category = function  
549 - | F("category",Fs("category_def",x)) ->  
550 - (match x with  
551 - | [F("name",Symbol value)] -> value, []  
552 - | [F("name",Symbol value);Fset("constraints",set)] ->  
553 - value, List.rev (Xlist.rev_map set (load_phrase (ref [])))  
554 - | l -> failwith ("load_category 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))  
555 - | xml -> failwith ("load_category:\n " ^ tei_to_string xml)  
556 -  
557 -and load_modification_def = function (*pomocnicza do load_lex *)  
558 - | [F("type",Symbol value)] -> parse_restr value, []  
559 - | [F("type",Symbol value); Fset("positions",set)] ->  
560 - parse_restr value, List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty)))  
561 - | x -> Printf.printf "%s\n" (tei_to_string (List.hd x));  
562 - failwith "load_modification_def:\n"  
563 -  
564 -and load_lex arg xml = match xml with  
565 - | F("argument",set) ->  
566 - let mode = ref [] in  
567 - let a = load_phrase mode set in  
568 - {arg with lex_argument = a; lex_mode = !mode}  
569 - | Fset("arguments",set) ->  
570 - {arg with lex_arguments=List.rev (Xlist.rev_map set (load_phrase (ref [])))}  
571 - | F("modification",Fs("modification_def",x)) -> {arg with lex_modification = load_modification_def x}  
572 - | F("lemma",Fs("lemma_def",[F("selection_mode",Symbol value1);  
573 - F("cooccurrence",Symbol value2);  
574 - Fset("lemmas",lemmas)])) ->  
575 - {arg with lex_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}  
576 - | F("numeral_lemma",Fs("numeral_lemma_def",[F("selection_mode",Symbol value1);  
577 - F("cooccurrence",Symbol value2);  
578 - Fset("lemmas",lemmas)])) ->  
579 - {arg with lex_numeral_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}  
580 - | F("negation",Symbol value) -> {arg with lex_negation = parse_negation value}  
581 - | F("degree",Symbol value) -> {arg with lex_degree = parse_grad value}  
582 - | F("number",Symbol value) -> {arg with lex_number = parse_number value}  
583 - | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue}  
584 - | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse}  
585 - | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty}  
586 - | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value}  
587 - | xml ->  
588 - Printf.printf "%s\n" (tei_to_string xml);  
589 - failwith "load_lex:\n "  
590 -  
591 -and load_phrase mode = function  
592 - | Fs("np",[F("case",Symbol a)]) -> Phrase (NP(parse_case a));  
593 - | Fs("prepnp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNP(a, parse_case b))  
594 - | Fs("adjp", [F("case",Symbol a)]) -> Phrase (AdjP(parse_case a))  
595 - | Fs("prepadjp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepAdjP(a, parse_case b))  
596 - | Fs("comprepnp", [e;F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))  
597 - | Fs("comprepnp", [F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))  
598 - | Fs("cp", [a]) -> PhraseComp(Cp,load_ctype a)  
599 - | Fs("ncp", [F("case",Symbol a);b]) -> PhraseComp(Ncp(parse_case a),load_ctype b)  
600 - | Fs("prepncp", [F("preposition",Symbol a);F("case",Symbol b);c]) -> PhraseComp(Prepncp(a, parse_case b),load_ctype c)  
601 - | Fs("infp", [F("aspect",Symbol a)]) -> Phrase (InfP(parse_aspect a))  
602 - | Fs("xp", [a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y)  
603 - | Fs("xp", [e;a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y)  
604 - | Fs("advp", [F("category",Symbol a)]) -> mode:=a :: !mode; Phrase(AdvP(a))  
605 - | Fs("advp", [e;F("category",Symbol a)]) -> mode:=a :: !mode; Phrase(AdvP(a))  
606 - | Fs("nonch", []) -> mode:="nonch" :: !mode; PhraseAbbr(Nonch,[])  
607 - | Fs("or", []) -> Phrase Or  
608 - | Fs("refl", []) -> mode:="refl" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))  
609 - | Fs("E", []) -> E Null  
610 - | Fs("lex", x) ->  
611 - let lex = Xlist.fold x empty_lex load_lex in  
612 - mode := lex.lex_mode @ !mode;  
613 - process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)  
614 - | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b))  
615 - | Fs("possp", [e]) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])  
616 - | Fs("possp", []) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])  
617 - | Fs("recip", []) -> mode:="recip" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))  
618 - | Fs("distrp", [e]) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])  
619 - | Fs("distrp", []) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])  
620 - | Fs("compar", [F("compar_category",Symbol value)]) -> Phrase(ComparP value)  
621 - | Fs("gerp", [F("case",Symbol a)]) -> Phrase (GerP(parse_case a))  
622 - | Fs("prepgerp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepGerP(a, parse_case b))  
623 - | Fs("nump", [F("case",Symbol a)]) -> Phrase (NumP(parse_case a))  
624 - | Fs("prepnump", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNumP(a, parse_case b))  
625 - | Fs("ppasp", [F("case",Symbol a)]) -> Phrase (PpasP(parse_case a))  
626 - | Fs("prepppasp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepPpasP(a, parse_case b))  
627 - | Fs("qub", []) -> Phrase Qub  
628 - | Fs("pactp", [F("case",Symbol a)]) -> Phrase (PactP(parse_case a))  
629 - | Fs("adverb",[F("adverb",Symbol s)]) -> LexPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[]))  
630 - | xml -> failwith ("load_phrase match:\n " ^ tei_to_string xml)  
631 -  
632 -and load_phrase_id ent sch psn phrases mode = function  
633 - | Fs(morf,Id{hash=false; numbers=[ent_id;sch_id;psn_id;id]; suffix="phr"} :: l) ->  
634 - if ent_id = ent && sch_id = sch && psn_id = psn then  
635 - let morf = load_phrase mode (Fs(morf, l)) in  
636 - phrases := IntMap.add_inc (!phrases) id morf (fun morf2 -> if morf = morf2 then morf else failwith "load_phrase_id");  
637 - MorfId id  
638 - else failwith (Printf.sprintf "load_phrase %d %d" ent ent_id)  
639 - | Fs(morf, l) -> load_phrase mode (Fs(morf, l))  
640 - | _ -> failwith "load_phrase_id"  
641 -  
642 -  
643 -and load_control arg = function  
644 - | Symbol value -> parse_control arg value  
645 - | xml -> failwith ("load_control:\n " ^ tei_to_string xml)  
646 -  
647 -and load_position_info ent sch phrases arg = function  
648 - | F("function",Symbol value) -> {arg with gf = parse_gf value}  
649 - | Fset("phrases",phrases_set) ->  
650 - let mode = ref [] in  
651 - let morfs = List.rev (Xlist.rev_map phrases_set (load_phrase_id ent sch arg.psn_id phrases mode)) in  
652 - {arg with morfs = morfs; mode = StringSet.to_list (StringSet.of_list (!mode))}  
653 - | Fset("control",control_set) -> Xlist.fold control_set arg load_control  
654 - | Id{hash=false; numbers=[ent_id;sch_id;id]; suffix="psn"} ->  
655 - if ent_id = ent && sch_id = sch then {arg with psn_id = id}  
656 - else failwith (Printf.sprintf "load_position_info %d %d" ent ent_id)  
657 - | xml -> failwith ("load_position_info:\n " ^ tei_to_string xml)  
658 -  
659 -and load_position ent sch phrases = function  
660 - | Fs("position", listt) ->  
661 - Xlist.fold listt empty_position (load_position_info ent sch phrases)  
662 - | xml -> failwith ("load_position:\n " ^ tei_to_string xml)  
663 -  
664 -let parse_opinion = function  
665 - "cer" -> Pewny  
666 - | "col" -> Potoczny  
667 - | "unc" -> Watpliwy  
668 - | "dat" -> Archaiczny  
669 - | "bad" -> Zly  
670 - | "vul" -> Wulgarny  
671 - | "unk" -> Nieokreslony  
672 - | "met" -> Metaforyczny  
673 - | "dom" -> Dziedzinowy  
674 - | "rar" -> Sporadyczny  
675 - | "wątpliwy" -> Watpliwy  
676 - | "dobry" -> Pewny  
677 - | "zły" -> Zly  
678 - | x -> failwith ("parse_opinion: " ^ x)  
679 -  
680 -let load_schema_info ent phrases (arg:schema) = function  
681 - | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}  
682 - | F("inherent_sie",Binary b) -> {arg with reflexiveMark = b}  
683 - | F("aspect",Symbol aspect_value) -> {arg with aspect = parse_aspect aspect_value}  
684 - | Fset("aspect", []) -> arg  
685 - | F("negativity",Symbol negativity_value) -> {arg with negativity = parse_negation negativity_value}  
686 - | Fset("negativity",[]) -> arg  
687 - | F("predicativity",Binary true) -> {arg with predicativity = PredTrue}  
688 - | F("predicativity",Binary false) -> {arg with predicativity = PredFalse}  
689 - | Fset("positions", positions) ->  
690 - {arg with positions = List.rev (Xlist.rev_map positions (load_position ent arg.sch_id phrases))}  
691 - | F("text_rep",TEIstring text_rep) -> {arg with text_rep = text_rep}  
692 - | Id{hash=false; numbers=[ent_id;id]; suffix="sch"} -> if ent_id = ent then {arg with sch_id = id} else failwith (Printf.sprintf "load_schema_info %d %d" ent ent_id)  
693 - | xml -> failwith ("load_schema_info\n " ^ tei_to_string xml)  
694 -  
695 -let load_schema ent phrases = function  
696 - Fs("schema", schema) ->  
697 - let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = false; aspect = AspectUndef;  
698 - negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in  
699 - let result = Xlist.fold schema result (load_schema_info ent phrases) in  
700 - result  
701 - | xml -> failwith ("load_schema:\n " ^ tei_to_string xml)  
702 -  
703 -let load_phrases_set ent = function  
704 - | SameAs({hash=true; numbers=[ent_id;sch_id;psn_id;phr_id]; suffix="phr"},"phrase") ->  
705 - if ent_id <> ent then failwith (Printf.sprintf "load_phrases_set %d %d" ent ent_id) else  
706 - sch_id,psn_id,phr_id  
707 - | xml -> failwith ("load_phrases_set :\n " ^ tei_to_string xml)  
708 -  
709 -let load_example_info ent arg = function  
710 - | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) ->  
711 - if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)  
712 - | Fset("phrases",phrases_set) ->  
713 - {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))}  
714 - | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}  
715 - | F("source",Symbol source_value) -> {arg with source = source_value}  
716 - | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}  
717 - | F("note",TEIstring note_string) -> {arg with note = note_string}  
718 - | Id{hash=false; numbers=[ent_id;id]; suffix="exm"} -> if ent_id = ent then {arg with exm_id = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)  
719 - | xml -> failwith ("load_example_info: \n " ^ tei_to_string xml)  
720 -  
721 -let load_example ent = function  
722 - | Fs("example",example_elements) ->  
723 - let result = {exm_id = (-1); meaning = (-1); phrases = []; sentence = "";  
724 - source = ""; opinion = OpinionUndef; note = "";} in  
725 - let result = Xlist.fold example_elements result (load_example_info ent) in  
726 - result  
727 - | xml -> failwith ("load_example: \n " ^ tei_to_string xml)  
728 -  
729 -let load_self_prefs_sets name ent frm = function  
730 - | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets"  
731 - | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets"  
732 - | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) ->  
733 - if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id)  
734 - else RelationArgId(value,arg_id)  
735 - | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml)  
736 -  
737 -let load_argument_self_prefs ent frm = function  
738 - | Fset(name,self_prefs_set) ->  
739 - List.rev (Xlist.rev_map self_prefs_set (load_self_prefs_sets name ent frm))  
740 - | xml -> failwith ("load_argument_self_prefs: \n " ^ tei_to_string xml)  
741 -  
742 -let load_argument_info ent frm arg = function  
743 - | F("role",Symbol value) -> {arg with role = value}  
744 - | F("role_attribute",Symbol value) -> {arg with role_attribute = value}  
745 - | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->  
746 - {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))}  
747 - (* | Id id -> {arg with arg_id = id} *)  
748 - | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} ->  
749 - if ent_id = ent && frm_id = frm then {arg with arg_id = id}  
750 - else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id)  
751 - | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)  
752 -  
753 -let load_arguments_set ent frm = function  
754 - | Fs("argument", info) ->  
755 - let result = {arg_id = (-1); role = ""; role_attribute = ""; sel_prefs = []} in  
756 - let result = Xlist.fold info result (load_argument_info ent frm) in  
757 - result  
758 - | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)  
759 -  
760 -let load_meanings_set ent = function  
761 - | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") ->  
762 - if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)  
763 - | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)  
764 -  
765 -let load_frame ent = function  
766 - | Fs("frame",[  
767 - Id{hash=false; numbers=[ent_id;id]; suffix="frm"};  
768 - F("opinion",Symbol opinion);  
769 - Fset("meanings",meanings_set);  
770 - Fset("arguments",arguments_set)]) ->  
771 - if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else  
772 - {frm_id = id;  
773 - opinion = parse_opinion opinion;  
774 - meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent));  
775 - arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))}  
776 - | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)  
777 -  
778 -let load_meaning_info ent arg = function  
779 - | F("name",TEIstring name_string) -> {arg with name = name_string}  
780 - | F("variant",TEIstring variant_string) -> {arg with variant = variant_string}  
781 - | F("plwnluid",Numeric value) -> {arg with plwnluid = value}  
782 - | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string}  
783 - | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id)  
784 - | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml)  
785 -  
786 -  
787 -let load_meaning ent = function  
788 - | Fs("lexical_unit", meaning_info) ->  
789 - Xlist.fold meaning_info empty_meaning (load_meaning_info ent)  
790 - | xml -> failwith ("load_meaning:\n " ^ tei_to_string xml)  
791 -  
792 -let load_alter_connection ent = function  
793 - | Fs("connection", [  
794 - F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument"));  
795 - Fset("phrases",phrases)]) ->  
796 - if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else  
797 - let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase ->  
798 - let sch_id,psn_id,phr_id = load_phrases_set ent phrase in  
799 - IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l),  
800 - IntSet.add sch_set sch_id) in  
801 - if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else  
802 - IntSet.min_elt sch_set, frm_id,  
803 - {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)}  
804 - | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml)  
805 -  
806 -let load_alternations ent = function  
807 - | Fs("alternation",[Fset("connections",connections_set)]) ->  
808 - let conns,sch_set,frm_set = Xlist.fold connections_set ([],IntSet.empty,IntSet.empty) (fun (conns,sch_set,frm_set) conn ->  
809 - let sch_id,frm_id,conn = load_alter_connection ent conn in  
810 - conn :: conns, IntSet.add sch_set sch_id, IntSet.add frm_set frm_id) in  
811 - if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alternations: |sch_set|=%d" (IntSet.size sch_set)) else  
812 - if IntSet.size frm_set <> 1 then failwith (Printf.sprintf "load_alternations: |frm_set|=%d" (IntSet.size sch_set)) else  
813 - {schema=IntSet.min_elt sch_set; frame=IntSet.min_elt frm_set; connections=List.rev conns}  
814 - | xml -> failwith ("load_alternations: \n " ^ tei_to_string xml)  
815 -  
816 -let load_entry phrases = function  
817 - | Xml.Element("entry",["xml:id",id], l) ->  
818 - (* print_endline id; *)  
819 - let id = match parse_id id with  
820 - {hash=false; numbers=[id]; suffix="ent"} -> id  
821 - | _ -> failwith "process_meanings" in  
822 - let entry = {empty_entry with ent_id = id} in  
823 - Xlist.fold l entry (fun e -> function  
824 - Xml.Element("form", [], [  
825 - Xml.Element("orth",[],[Xml.PCData orth]);  
826 - Xml.Element("pos",[],[Xml.PCData pos])]) -> (*print_endline orth;*) {e with form_orth=orth; form_pos=pos}  
827 - | xml -> (match parse_tei xml with  
828 - | Fs("syntactic_layer", [Fset("schemata",schemata_set)]) -> {e with schemata = List.rev (Xlist.rev_map schemata_set (load_schema id phrases))}  
829 - | Fs("examples_layer", [Fset("examples",examples_set)]) -> {e with examples = List.rev (Xlist.rev_map examples_set (load_example id))}  
830 - | Fs("semantic_layer", [Fset("frames",frame_set)]) -> {e with frames = List.rev (Xlist.rev_map frame_set (load_frame id))}  
831 - | Fs("meanings_layer", [Fset("meanings",meanings_set)]) -> {e with meanings = List.rev (Xlist.rev_map meanings_set (load_meaning id))}  
832 - | Fs("connections_layer",[Fset("alternations",alternations)]) -> {e with alternations = List.rev (Xlist.rev_map alternations (load_alternations id))}  
833 - | Fs("general_info",[F("status",TEIstring status)]) -> {e with status=status}  
834 - | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))  
835 - | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)  
836 -  
837 -let load_walenty filename =  
838 - begin  
839 - match Xml.parse_file filename with  
840 - Xml.Element("TEI", _,  
841 - [Xml.Element("teiHeader",_,_) ;  
842 - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->  
843 - let phrases = ref IntMap.empty in  
844 - let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in  
845 - walenty, !phrases  
846 - | _ -> failwith "load_walenty"  
847 - end  
848 -  
849 -let correct_expansion = function  
850 - [{gf=ARG; cr=[]; ce=[]; morfs=[Phrase(FixedP s)]};p] -> [LexPhrase([FIXED,Lexeme s],(Ratr,[p]))]  
851 - | [{gf=ARG; cr=[]; ce=[]; morfs=[LexPhrase([pos,Lexeme "własny"],(Natr,[]))]};{morfs=[a;b]} as p] ->  
852 - [a;b;LexPhrase([pos,Lexeme "własny"],(Atr,[p]))]  
853 - | _ -> failwith "correct_expansion"  
854 -  
855 -let load_expansion = function  
856 - Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",[p])]) -> [load_phrase (ref []) p]  
857 - | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> correct_expansion (List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty))))  
858 - | tei -> failwith ("load_expansion: \n" ^ tei_to_string tei)  
859 -  
860 -let load_rentry = function  
861 - | Xml.Element("entry",["xml:id",id], [phrase;exp]) ->  
862 - let id = match parse_id id with  
863 - {hash=false; numbers=[id]; suffix="exp"} -> id  
864 - | _ -> failwith "process_meanings" in  
865 - let morf = load_phrase (ref []) (parse_tei phrase) in  
866 - let expansions = match parse_tei exp with  
867 - | Fs("phrase_type_expansions", [Fset("expansions",expansions)]) -> List.flatten (List.rev (Xlist.rev_map expansions load_expansion))  
868 - | Fs("phrase_type_expansions", [F("expansions",expansion)]) -> load_expansion expansion  
869 - | tei -> failwith ("load_entry: \n" ^ tei_to_string tei) in  
870 - id,morf,expansions  
871 - | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)  
872 -  
873 -let expands_supplement = [  
874 - (-2), PhraseAbbr(Nonch,[]), [  
875 - LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[]));  
876 - LexPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[]));  
877 - LexPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[]));  
878 - LexPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[]));  
879 - ];  
880 - (-3), Phrase (AdvP "pron"), [  
881 - LexPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[]));  
882 - LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[]))  
883 - ]]  
884 -  
885 -let load_expands filename =  
886 - begin  
887 - match Xml.parse_file filename with  
888 - Xml.Element("TEI", _,  
889 - [Xml.Element("teiHeader",_,_) ;  
890 - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->  
891 - expands_supplement @ List.rev (Xlist.rev_map entries load_rentry)  
892 - | _ -> failwith "load_walenty"  
893 - end  
894 -  
895 -let subtypes = [  
896 - "int",[  
897 - "co"; "czemu"; "czy"; "czyj"; "dlaczego"; "dokąd"; "gdzie"; "ile"; "jak";  
898 - "jaki"; "kiedy"; "kto"; "którędy"; "który"; "odkąd"; "skąd"; "jakoby"];  
899 - "rel",[  
900 - "co"; "dokąd"; "gdzie"; "jak"; "jakby"; "jaki"; "jakoby"; "kiedy"; "kto";  
901 - "którędy"; "który"; "odkąd"; "skąd"]]  
902 -  
903 -let equivs = ["jak",["niczym"]; "przeciw",["przeciwko"]]  
904 -*)