Commit 4afde66aa00c8ecb39f904e7d806e7832f12d68e
1 parent
44e8be87
Dostosowanie do nowego Walentego
Showing
14 changed files
with
223 additions
and
43 deletions
LCGlexicon/ENIAMcategoriesPL.ml
@@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ | @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ | ||
37 | "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; | 37 | "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; |
38 | "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; | 38 | "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; |
39 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; | 39 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; |
40 | - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | 40 | + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj"; |
41 | "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; | 41 | "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; |
42 | Pos2, []; | 42 | Pos2, []; |
43 | Cat, []; | 43 | Cat, []; |
@@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function | @@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function | ||
413 | | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | 413 | | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
414 | | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | 414 | | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
415 | | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] | 415 | | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
416 | + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}] | ||
416 | | lemma,"qub",[] -> | 417 | | lemma,"qub",[] -> |
417 | if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}] | 418 | if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}] |
418 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}] | 419 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}] |
@@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [ | @@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [ | ||
662 | "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | 663 | "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
663 | "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | 664 | "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
664 | "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; | 665 | "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
666 | + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;]; | ||
665 | "qub",[Lemma;Cat;Role;SNode;]; | 667 | "qub",[Lemma;Cat;Role;SNode;]; |
666 | "part",[Lemma;SNode]; | 668 | "part",[Lemma;SNode]; |
667 | "comp",[Lemma;SNode;];(* ctype *) | 669 | "comp",[Lemma;SNode;];(* ctype *) |
LCGparser/ENIAM_LCGlatexOf.ml
@@ -213,7 +213,7 @@ let chart page text_fragments g = | @@ -213,7 +213,7 @@ let chart page text_fragments g = | ||
213 | String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes -> | 213 | String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes -> |
214 | IntMap.fold nodes l (fun l node1 contents -> | 214 | IntMap.fold nodes l (fun l node1 contents -> |
215 | Xlist.fold contents l (fun l (node2,symbol,sem) -> | 215 | Xlist.fold contents l (fun l (node2,symbol,sem) -> |
216 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | 216 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in |
217 | (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ | 217 | (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ |
218 | "\\end{longtable}" | 218 | "\\end{longtable}" |
219 | 219 | ||
@@ -221,7 +221,7 @@ let chart2 page text_fragments g = | @@ -221,7 +221,7 @@ let chart2 page text_fragments g = | ||
221 | let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in | 221 | let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in |
222 | "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ | 222 | "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ |
223 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> | 223 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> |
224 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | 224 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in |
225 | (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ | 225 | (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ |
226 | "\\end{longtable}" | 226 | "\\end{longtable}" |
227 | 227 |
semantics/ENIAMsemGraph.ml
@@ -570,7 +570,8 @@ let rec reduce_tree = function | @@ -570,7 +570,8 @@ let rec reduce_tree = function | ||
570 | (match reduce_tree c with | 570 | (match reduce_tree c with |
571 | Context c -> | 571 | Context c -> |
572 | let t,args = extract_aroles {t with arole=""} c.cx_contents in | 572 | let t,args = extract_aroles {t with arole=""} c.cx_contents in |
573 | - make_relation t (Context {c with cx_contents=args}) | 573 | + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *) |
574 | + Relation(t.role,"",Context {c with cx_contents=args}) | ||
574 | | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c)))) | 575 | | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c)))) |
575 | | c -> ManageCoordination(t,c)) | 576 | | c -> ManageCoordination(t,c)) |
576 | | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree)) | 577 | | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree)) |
subsyntax/ENIAMsubsyntax.ml
@@ -325,7 +325,7 @@ let parse query = | @@ -325,7 +325,7 @@ let parse query = | ||
325 | let paths,_ = ENIAM_MWE.process paths in | 325 | let paths,_ = ENIAM_MWE.process paths in |
326 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *) | 326 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *) |
327 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) | 327 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
328 | - let paths = List.rev (Xlist.rev_map paths find_proper_names) in | 328 | + let paths = if !recognize_proper_names then List.rev (Xlist.rev_map paths find_proper_names) else paths in |
329 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *) | 329 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *) |
330 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) | 330 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
331 | let paths = modify_weights paths in | 331 | let paths = modify_weights paths in |
subsyntax/ENIAMsubsyntaxTypes.ml
@@ -46,6 +46,7 @@ type text = | @@ -46,6 +46,7 @@ type text = | ||
46 | | AltText of (mode * text) list | 46 | | AltText of (mode * text) list |
47 | 47 | ||
48 | let strong_disambiguate_flag = ref false | 48 | let strong_disambiguate_flag = ref false |
49 | +let recognize_proper_names = ref true | ||
49 | 50 | ||
50 | let data_path = | 51 | let data_path = |
51 | try Sys.getenv "ENIAM_USER_DATA_PATH" | 52 | try Sys.getenv "ENIAM_USER_DATA_PATH" |
subsyntax/interface.ml
@@ -43,6 +43,8 @@ let spec_list = [ | @@ -43,6 +43,8 @@ let spec_list = [ | ||
43 | "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)"; | 43 | "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)"; |
44 | "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided"; | 44 | "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided"; |
45 | "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)"; | 45 | "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)"; |
46 | + "--proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=true), "Recognize proper names (default)"; | ||
47 | + "--no-proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=false), "Do not recognize proper names"; | ||
46 | ] | 48 | ] |
47 | 49 | ||
48 | let usage_msg = | 50 | let usage_msg = |
walenty/ENIAMwalAnalyze.ml
@@ -93,7 +93,7 @@ let walenty_filename,expands_filename = | @@ -93,7 +93,7 @@ let walenty_filename,expands_filename = | ||
93 | (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml", | 93 | (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml", |
94 | "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *) | 94 | "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *) |
95 | 95 | ||
96 | -let _ = | 96 | +(*let _ = |
97 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in | 97 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in |
98 | let walenty = Xlist.rev_map walenty correct_walenty in | 98 | let walenty = Xlist.rev_map walenty correct_walenty in |
99 | let expands = ENIAMwalTEI.load_expands expands_filename in | 99 | let expands = ENIAMwalTEI.load_expands expands_filename in |
@@ -126,7 +126,7 @@ let _ = | @@ -126,7 +126,7 @@ let _ = | ||
126 | File.file_out "results/controll.tab" (fun file -> | 126 | File.file_out "results/controll.tab" (fun file -> |
127 | StringMap.iter cmap (fun s l -> | 127 | StringMap.iter cmap (fun s l -> |
128 | Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l))); | 128 | Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l))); |
129 | - () | 129 | + ()*) |
130 | 130 | ||
131 | (* Test unikalności indeksów sensów *) | 131 | (* Test unikalności indeksów sensów *) |
132 | (* let _ = | 132 | (* let _ = |
@@ -339,3 +339,110 @@ let has_realization = function | @@ -339,3 +339,110 @@ let has_realization = function | ||
339 | 339 | ||
340 | (* let _ = print_entries entries *) | 340 | (* let _ = print_entries entries *) |
341 | *) | 341 | *) |
342 | + | ||
343 | +let selected_phrases = | ||
344 | + File.fold_tab "results/phrases_cp.tab" IntSet.empty (fun set -> function | ||
345 | + [id;_] -> IntSet.add set (int_of_string id) | ||
346 | + | _ -> failwith "selected_phrases") | ||
347 | + | ||
348 | +let print_phrases filename phrases = | ||
349 | + File.file_out filename (fun file -> | ||
350 | + IntMap.iter phrases (fun id morf -> | ||
351 | + Printf.fprintf file "%d\t%s\n" id (ENIAMwalStringOf.morf morf))) | ||
352 | + | ||
353 | +let rec connected_schema schema = | ||
354 | + String.concat "+" (Xlist.map schema (fun s -> | ||
355 | + String.concat "," ( | ||
356 | + (if s.gf = ARG then [] else [ENIAMwalStringOf.gf s.gf])@ | ||
357 | + s.mode@(ENIAMwalStringOf.controllers s.cr)@(ENIAMwalStringOf.controllees s.ce)) ^ | ||
358 | + "{" ^ String.concat ";" (Xlist.map s.morfs ENIAMwalStringOf.morf) ^ "}:" ^ ENIAMwalStringOf.sem_frame s)) | ||
359 | + | ||
360 | +let print_connected filename connected = | ||
361 | + File.file_out filename (fun file -> | ||
362 | + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema,examples*) -> | ||
363 | + Printf.fprintf file "\n\t%d\t%d\t%s: %s: %s: %s: %s: %s: %s: %s:\t%s\n" | ||
364 | + c.sch_id c.frm_id pos lemma | ||
365 | + (ENIAMwalStringOf.opinion c.sopinion) | ||
366 | + (ENIAMwalStringOf.opinion c.fopinion) | ||
367 | + (String.concat "," (Xlist.map c.meanings (fun m -> | ||
368 | + if m.name="" then string_of_int m.mng_id else m.name ^ "-" ^ m.variant))) | ||
369 | + (ENIAMwalStringOf.negation c.negativity) | ||
370 | + (ENIAMwalStringOf.pred c.predicativity) | ||
371 | + (ENIAMwalStringOf.aspect c.aspect) | ||
372 | + (connected_schema c.schema); | ||
373 | + Xlist.iter c.examples (fun (opinion,exm) -> | ||
374 | + Printf.fprintf file "#%s: %s\n" (ENIAMwalStringOf.opinion opinion) exm))) | ||
375 | + | ||
376 | +let expand_morf phrases = function | ||
377 | + | MorfId id -> | ||
378 | + (try IntMap.find phrases id | ||
379 | + with Not_found -> Printf.printf "expand_morf: %d\n" id; MorfId id) | ||
380 | + | _ -> failwith "expand_morf" | ||
381 | + | ||
382 | +let expand_sel_prefs meanings = function | ||
383 | + SynsetId id -> | ||
384 | + (try | ||
385 | + let m = IntMap.find meanings id in | ||
386 | + Predef (m.name ^ "-" ^ m.variant) | ||
387 | + with Not_found -> (*Printf.printf "expand_sel_prefs: %d\n" id;*) SynsetId id) | ||
388 | + | s -> s | ||
389 | + | ||
390 | +let expand_schema phrases meanings_map c = | ||
391 | + let schema = Xlist.map c.schema (fun (s : position) -> | ||
392 | + {s with | ||
393 | + morfs = Xlist.map s.morfs (expand_morf phrases); | ||
394 | + sel_prefs = Xlist.map s.sel_prefs (expand_sel_prefs meanings_map)}) in | ||
395 | + (* let meanings = Xlist.map c.meanings (fun id -> try IntMap.find meanings_map id with Not_found -> {empty_meaning with name=string_of_int id}) in *) | ||
396 | + {c with (*meanings2=meanings;*) schema=schema} | ||
397 | + | ||
398 | +let assign_examples examples c = | ||
399 | + let p_set = Xlist.fold c.schema IntSet.empty (fun p_set p -> | ||
400 | + Xlist.fold p.morfs p_set (fun p_set -> function | ||
401 | + MorfId id -> IntSet.add p_set id | ||
402 | + | _ -> p_set)) in | ||
403 | + let m_set = Xlist.fold c.meanings IntSet.empty (fun m_set m -> IntSet.add m_set m.mng_id) in | ||
404 | + let examples = Xlist.fold examples [] (fun examples (e : example) -> | ||
405 | + let b = Xlist.fold e.phrases false (fun b (sch_id,_,morf_id) -> | ||
406 | + if c.sch_id = sch_id && IntSet.mem p_set morf_id then true else b) in | ||
407 | + if IntSet.mem m_set e.meaning && b then e :: examples else examples) in | ||
408 | + let examples = Xlist.rev_map examples (fun e -> e.opinion,e.sentence) in | ||
409 | + {c with examples=examples} | ||
410 | + | ||
411 | +let select_morfs morfs = | ||
412 | + List.rev (Xlist.fold morfs [] (fun morfs -> function | ||
413 | + MorfId id -> if IntSet.mem selected_phrases id then (MorfId id) :: morfs else morfs | ||
414 | + | _ -> failwith "select_morfs")) | ||
415 | + | ||
416 | +let select_positions schema = | ||
417 | + List.rev (Xlist.fold schema [] (fun schema p -> | ||
418 | + let morfs = select_morfs p.morfs in | ||
419 | + if morfs = [] then schema else | ||
420 | + {p with morfs = morfs} :: schema)) | ||
421 | + | ||
422 | +let select_entries entries = | ||
423 | + Xlist.fold entries [] (fun entries c -> | ||
424 | + let schema = select_positions c.schema in | ||
425 | + if schema = [] then entries else c :: entries) | ||
426 | + | ||
427 | +(* Wypisanie podrzędników zdaniowych *) | ||
428 | +let _ = | ||
429 | + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in | ||
430 | + print_phrases "results/phrases.tab" phrases; | ||
431 | + let meanings = | ||
432 | + Xlist.fold walenty IntMap.empty (fun meanings entry -> | ||
433 | + Xlist.fold entry.meanings meanings (fun meanings meaning -> | ||
434 | + IntMap.add meanings meaning.mng_id meaning)) in | ||
435 | + let connected_walenty = | ||
436 | + Xlist.fold walenty Entries.empty (fun connected_walenty e -> | ||
437 | + (* print_endline "1"; *) | ||
438 | + let entries = ENIAMwalConnect.connect e in | ||
439 | + (* print_endline "2"; *) | ||
440 | + let entries = select_entries entries in | ||
441 | + (* print_endline "3"; *) | ||
442 | + let entries = Xlist.rev_map entries (assign_examples e.examples) in | ||
443 | + (* print_endline "4"; *) | ||
444 | + let entries = Xlist.rev_map entries (expand_schema phrases meanings) in | ||
445 | + (* print_endline "5"; *) | ||
446 | + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in | ||
447 | + print_connected "results/connected.tab" connected_walenty; | ||
448 | + () |
walenty/ENIAMwalConnect.ml
@@ -30,7 +30,7 @@ let process_positions positions = | @@ -30,7 +30,7 @@ let process_positions positions = | ||
30 | IntMap.add positions position.psn_id position) | 30 | IntMap.add positions position.psn_id position) |
31 | 31 | ||
32 | let process_schemata schemata = | 32 | let process_schemata schemata = |
33 | - Xlist.fold schemata IntMap.empty (fun schemata schema -> | 33 | + Xlist.fold schemata IntMap.empty (fun schemata (schema : schema) -> |
34 | let atrs = schema.negativity, schema.predicativity, schema.aspect in | 34 | let atrs = schema.negativity, schema.predicativity, schema.aspect in |
35 | let positions = process_positions schema.positions in | 35 | let positions = process_positions schema.positions in |
36 | IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions)) | 36 | IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions)) |
@@ -44,9 +44,9 @@ let process_frames frames = | @@ -44,9 +44,9 @@ let process_frames frames = | ||
44 | let arguments = process_arguments frame.arguments in | 44 | let arguments = process_arguments frame.arguments in |
45 | IntMap.add frames frame.frm_id (frame,arguments)) | 45 | IntMap.add frames frame.frm_id (frame,arguments)) |
46 | 46 | ||
47 | -(* let process_meanings meanings = | 47 | +let process_meanings meanings = |
48 | Xlist.fold meanings IntMap.empty (fun meanings meaning -> | 48 | Xlist.fold meanings IntMap.empty (fun meanings meaning -> |
49 | - IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) *) | 49 | + IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) |
50 | 50 | ||
51 | let process_sel_pref arguments = function | 51 | let process_sel_pref arguments = function |
52 | SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*) | 52 | SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*) |
@@ -59,9 +59,9 @@ let process_sel_pref arguments = function | @@ -59,9 +59,9 @@ let process_sel_pref arguments = function | ||
59 | let connect entry = | 59 | let connect entry = |
60 | let schemata = process_schemata entry.schemata in | 60 | let schemata = process_schemata entry.schemata in |
61 | let frames = process_frames entry.frames in | 61 | let frames = process_frames entry.frames in |
62 | - (* let meanings = process_meanings entry.meanings in *) | 62 | + let meanings = process_meanings entry.meanings in |
63 | Xlist.fold entry.alternations [] (fun found alt -> | 63 | Xlist.fold entry.alternations [] (fun found alt -> |
64 | - let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in | 64 | + let refl,opinion,(n,p,a),positions = IntMap.find schemata alt.schema in |
65 | let frame,arguments = IntMap.find frames alt.frame in | 65 | let frame,arguments = IntMap.find frames alt.frame in |
66 | let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in | 66 | let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in |
67 | let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn -> | 67 | let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn -> |
@@ -75,9 +75,10 @@ let connect entry = | @@ -75,9 +75,10 @@ let connect entry = | ||
75 | with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in | 75 | with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in |
76 | {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; | 76 | {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; |
77 | morfs=List.rev morfs} :: conn_positions)) in | 77 | morfs=List.rev morfs} :: conn_positions)) in |
78 | - (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> | ||
79 | - IntMap.find meanings id)) in *) | ||
80 | - (opinion,frame.opinion,frame.meanings,schema_atrs,conn_positions) :: found) | 78 | + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> |
79 | + try IntMap.find meanings id with Not_found -> {empty_meaning with mng_id=id})) in | ||
80 | + {sch_id=alt.schema; frm_id=alt.frame; sopinion=opinion; fopinion=frame.opinion; meanings=meanings; | ||
81 | + negativity=n; predicativity=p;aspect=a; schema=conn_positions; examples=[]} :: found) | ||
81 | 82 | ||
82 | let schemata entry = | 83 | let schemata entry = |
83 | let schemata = process_schemata entry.schemata in | 84 | let schemata = process_schemata entry.schemata in |
walenty/ENIAMwalGenerate.ml
@@ -31,40 +31,54 @@ let correct_walenty entry = | @@ -31,40 +31,54 @@ let correct_walenty entry = | ||
31 | else entry | 31 | else entry |
32 | 32 | ||
33 | let load_walenty walenty_filename expands_filename = | 33 | let load_walenty walenty_filename expands_filename = |
34 | + print_endline "load_walenty 1"; | ||
34 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in | 35 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in |
36 | + print_endline "load_walenty 2"; | ||
35 | let walenty = Xlist.rev_map walenty correct_walenty in | 37 | let walenty = Xlist.rev_map walenty correct_walenty in |
38 | + print_endline "load_walenty 3"; | ||
36 | let expands = ENIAMwalTEI.load_expands expands_filename in | 39 | let expands = ENIAMwalTEI.load_expands expands_filename in |
40 | + print_endline "load_walenty 4"; | ||
37 | let meanings = | 41 | let meanings = |
38 | Xlist.fold walenty IntMap.empty (fun meanings entry -> | 42 | Xlist.fold walenty IntMap.empty (fun meanings entry -> |
39 | Xlist.fold entry.meanings meanings (fun meanings meaning -> | 43 | Xlist.fold entry.meanings meanings (fun meanings meaning -> |
40 | IntMap.add meanings meaning.mng_id meaning)) in | 44 | IntMap.add meanings meaning.mng_id meaning)) in |
45 | + print_endline "load_walenty 5"; | ||
41 | let connected_walenty = | 46 | let connected_walenty = |
42 | Xlist.fold walenty Entries.empty (fun connected_walenty e -> | 47 | Xlist.fold walenty Entries.empty (fun connected_walenty e -> |
43 | let entries = ENIAMwalConnect.connect e in | 48 | let entries = ENIAMwalConnect.connect e in |
44 | Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in | 49 | Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in |
50 | + print_endline "load_walenty 6"; | ||
45 | let schemata_walenty = | 51 | let schemata_walenty = |
46 | Xlist.fold walenty Entries.empty (fun schemata_walenty e -> | 52 | Xlist.fold walenty Entries.empty (fun schemata_walenty e -> |
47 | let entries = ENIAMwalConnect.schemata e in | 53 | let entries = ENIAMwalConnect.schemata e in |
48 | Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in | 54 | Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in |
55 | + print_endline "load_walenty 7"; | ||
49 | let expands,compreps,subtypes,equivs,adv_types = | 56 | let expands,compreps,subtypes,equivs,adv_types = |
50 | ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in | 57 | ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in |
58 | + print_endline "load_walenty 8"; | ||
51 | let phrases = | 59 | let phrases = |
52 | IntMap.map phrases (fun morf -> | 60 | IntMap.map phrases (fun morf -> |
53 | let morf = ENIAMwalRealizations.expand_schema_morf expands morf in | 61 | let morf = ENIAMwalRealizations.expand_schema_morf expands morf in |
54 | let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in | 62 | let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in |
55 | let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in | 63 | let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in |
56 | morf) in | 64 | morf) in |
65 | + print_endline "load_walenty 9"; | ||
57 | let compreps = Xlist.map compreps (fun (lemma,morfs) -> | 66 | let compreps = Xlist.map compreps (fun (lemma,morfs) -> |
58 | lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in | 67 | lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in |
68 | + print_endline "load_walenty 10"; | ||
59 | let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in | 69 | let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in |
70 | + print_endline "load_walenty 11"; | ||
60 | let phrases,entries = | 71 | let phrases,entries = |
61 | IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> | 72 | IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> |
62 | let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in | 73 | let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in |
63 | let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in | 74 | let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in |
64 | IntMap.add phrases id morfs, entries) in | 75 | IntMap.add phrases id morfs, entries) in |
76 | + print_endline "load_walenty 12"; | ||
65 | let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) -> | 77 | let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) -> |
66 | Entries.add_inc entries pos lemma entry) in | 78 | Entries.add_inc entries pos lemma entry) in |
79 | + print_endline "load_walenty 13"; | ||
67 | let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in | 80 | let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in |
81 | + print_endline "load_walenty 14"; | ||
68 | let entries = Entries.flatten_map entries (fun pos lemma entry -> | 82 | let entries = Entries.flatten_map entries (fun pos lemma entry -> |
69 | ENIAMwalLex.expand_restr [] lemma pos entry) in | 83 | ENIAMwalLex.expand_restr [] lemma pos entry) in |
70 | (* let entries = | 84 | (* let entries = |
@@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename = | @@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename = | ||
72 | StringMap.mapi entries2 (fun lemma entries3 -> | 86 | StringMap.mapi entries2 (fun lemma entries3 -> |
73 | EntrySet.fold entries3 [] (fun entries3 entry -> | 87 | EntrySet.fold entries3 [] (fun entries3 entry -> |
74 | (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *) | 88 | (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *) |
89 | + print_endline "load_walenty 15"; | ||
75 | connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types | 90 | connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types |
76 | 91 | ||
77 | let print_entries filename entries = | 92 | let print_entries filename entries = |
@@ -97,15 +112,15 @@ let print_schemata filename schemata = | @@ -97,15 +112,15 @@ let print_schemata filename schemata = | ||
97 | 112 | ||
98 | let print_connected filename connected = | 113 | let print_connected filename connected = |
99 | File.file_out filename (fun file -> | 114 | File.file_out filename (fun file -> |
100 | - Entries.iter connected (fun pos lemma (sopinion,fopinion,meanings,(n,p,a),schema) -> | 115 | + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema*) -> |
101 | Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma | 116 | Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma |
102 | - (ENIAMwalStringOf.opinion sopinion) | ||
103 | - (ENIAMwalStringOf.opinion fopinion) | ||
104 | - (String.concat "," (Xlist.map meanings string_of_int)) | ||
105 | - (ENIAMwalStringOf.negation n) | ||
106 | - (ENIAMwalStringOf.pred p) | ||
107 | - (ENIAMwalStringOf.aspect a) | ||
108 | - (ENIAMwalStringOf.connected_schema schema))) | 117 | + (ENIAMwalStringOf.opinion c.sopinion) |
118 | + (ENIAMwalStringOf.opinion c.fopinion) | ||
119 | + (String.concat "," (Xlist.map c.meanings (fun m -> string_of_int m.mng_id))) | ||
120 | + (ENIAMwalStringOf.negation c.negativity) | ||
121 | + (ENIAMwalStringOf.pred c.predicativity) | ||
122 | + (ENIAMwalStringOf.aspect c.aspect) | ||
123 | + (ENIAMwalStringOf.connected_schema c.schema))) | ||
109 | 124 | ||
110 | let split_tokens s = | 125 | let split_tokens s = |
111 | let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function | 126 | let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function |
walenty/ENIAMwalStringOf.ml
@@ -101,6 +101,7 @@ let gf = function | @@ -101,6 +101,7 @@ let gf = function | ||
101 | SUBJ -> "subj" | 101 | SUBJ -> "subj" |
102 | | OBJ -> "obj" | 102 | | OBJ -> "obj" |
103 | | ARG -> "arg"(*""*) | 103 | | ARG -> "arg"(*""*) |
104 | + | HEAD -> "head" | ||
104 | 105 | ||
105 | let pos = function | 106 | let pos = function |
106 | SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")" | 107 | SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")" |
walenty/ENIAMwalTEI.ml
@@ -60,7 +60,8 @@ let rec tei_to_string = function | @@ -60,7 +60,8 @@ let rec tei_to_string = function | ||
60 | | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) | 60 | | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) |
61 | | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) | 61 | | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) |
62 | | Id id -> Printf.sprintf "Id(%s)" (string_of_id id) | 62 | | Id id -> Printf.sprintf "Id(%s)" (string_of_id id) |
63 | - | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s | 63 | + (* | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s *) |
64 | + | SameAs(id,s) -> Printf.sprintf "SameAs(%s,%s)" (string_of_id id) s | ||
64 | 65 | ||
65 | let rec parse_tei = function | 66 | let rec parse_tei = function |
66 | Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) -> | 67 | Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) -> |
@@ -82,6 +83,7 @@ let rec parse_tei = function | @@ -82,6 +83,7 @@ let rec parse_tei = function | ||
82 | let parse_gf = function | 83 | let parse_gf = function |
83 | "subj" -> SUBJ | 84 | "subj" -> SUBJ |
84 | | "obj" -> OBJ | 85 | | "obj" -> OBJ |
86 | + | "head" -> HEAD | ||
85 | | s -> failwith ("parse_gf: " ^ s) | 87 | | s -> failwith ("parse_gf: " ^ s) |
86 | 88 | ||
87 | let parse_control arg = function | 89 | let parse_control arg = function |
@@ -128,6 +130,7 @@ let parse_number = function | @@ -128,6 +130,7 @@ let parse_number = function | ||
128 | 130 | ||
129 | let parse_gender = function | 131 | let parse_gender = function |
130 | "m1" -> Gender "m1" | 132 | "m1" -> Gender "m1" |
133 | + | "m2" -> Gender "m2" | ||
131 | | "m3" -> Gender "m3" | 134 | | "m3" -> Gender "m3" |
132 | | "n" -> Gender "n"(*Genders["n1";"n2"]*) | 135 | | "n" -> Gender "n"(*Genders["n1";"n2"]*) |
133 | | "f" -> Gender "f" | 136 | | "f" -> Gender "f" |
@@ -136,6 +139,21 @@ let parse_gender = function | @@ -136,6 +139,21 @@ let parse_gender = function | ||
136 | | "agr" -> GenderAgr | 139 | | "agr" -> GenderAgr |
137 | | s -> failwith ("parse_gender: " ^ s) | 140 | | s -> failwith ("parse_gender: " ^ s) |
138 | 141 | ||
142 | +let parse_genders = function | ||
143 | + [Symbol "agr"] -> GenderAgr | ||
144 | + | genders -> | ||
145 | + let genders = Xlist.map genders (function | ||
146 | + Symbol "m1" -> "m1" | ||
147 | + | Symbol "m2" -> "m2" | ||
148 | + | Symbol "m3" -> "m3" | ||
149 | + | Symbol "n" -> "n" | ||
150 | + | Symbol "f" -> "f" | ||
151 | + | s -> failwith ("parse_genders: " ^ tei_to_string s)) in | ||
152 | + (match genders with | ||
153 | + [g] -> Gender g | ||
154 | + | [] -> failwith "parse_genders: empty" | ||
155 | + | _ -> Genders genders) | ||
156 | + | ||
139 | let parse_grad = function | 157 | let parse_grad = function |
140 | "pos" -> Grad "pos" | 158 | "pos" -> Grad "pos" |
141 | | "com" -> Grad "com" | 159 | | "com" -> Grad "com" |
@@ -310,7 +328,7 @@ and load_lex arg xml = match xml with | @@ -310,7 +328,7 @@ and load_lex arg xml = match xml with | ||
310 | | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue} | 328 | | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue} |
311 | | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse} | 329 | | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse} |
312 | | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty} | 330 | | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty} |
313 | - | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value} | 331 | + | Fset("gender",genders) -> {arg with lex_gender = parse_genders genders} |
314 | | xml -> | 332 | | xml -> |
315 | Printf.printf "%s\n" (tei_to_string xml); | 333 | Printf.printf "%s\n" (tei_to_string xml); |
316 | failwith "load_lex:\n " | 334 | failwith "load_lex:\n " |
@@ -436,6 +454,8 @@ let load_phrases_set ent = function | @@ -436,6 +454,8 @@ let load_phrases_set ent = function | ||
436 | let load_example_info ent arg = function | 454 | let load_example_info ent arg = function |
437 | | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) -> | 455 | | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) -> |
438 | if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id) | 456 | if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id) |
457 | + | F("meaning",SameAs({hash=true; numbers=[id]; suffix="mng"},"lexical_unit")) -> | ||
458 | + {arg with meaning = id} | ||
439 | | Fset("phrases",phrases_set) -> | 459 | | Fset("phrases",phrases_set) -> |
440 | {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))} | 460 | {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))} |
441 | | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string} | 461 | | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string} |
@@ -456,8 +476,8 @@ let load_example ent = function | @@ -456,8 +476,8 @@ let load_example ent = function | ||
456 | let load_self_prefs_sets name ent frm = function | 476 | let load_self_prefs_sets name ent frm = function |
457 | | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets" | 477 | | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets" |
458 | | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets" | 478 | | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets" |
459 | - | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) -> | ||
460 | - if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id) | 479 | + | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"}, "argument"))]) -> |
480 | + if (*ent_id <> ent ||*) frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d" ent (*ent_id*)) | ||
461 | else RelationArgId(value,arg_id) | 481 | else RelationArgId(value,arg_id) |
462 | | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml) | 482 | | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml) |
463 | 483 | ||
@@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function | @@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function | ||
472 | | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) -> | 492 | | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) -> |
473 | {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))} | 493 | {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))} |
474 | (* | Id id -> {arg with arg_id = id} *) | 494 | (* | Id id -> {arg with arg_id = id} *) |
475 | - | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} -> | ||
476 | - if ent_id = ent && frm_id = frm then {arg with arg_id = id} | ||
477 | - else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id) | 495 | + | Id{hash=false; numbers=[(*ent_id;*)frm_id;id]; suffix="arg"} -> |
496 | + if (*ent_id = ent &&*) frm_id = frm then {arg with arg_id = id} | ||
497 | + else failwith (Printf.sprintf "load_argument_info %d" ent (*ent_id*)) | ||
478 | | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml) | 498 | | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml) |
479 | 499 | ||
480 | let load_arguments_set ent frm = function | 500 | let load_arguments_set ent frm = function |
@@ -485,21 +505,26 @@ let load_arguments_set ent frm = function | @@ -485,21 +505,26 @@ let load_arguments_set ent frm = function | ||
485 | | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml) | 505 | | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml) |
486 | 506 | ||
487 | let load_meanings_set ent = function | 507 | let load_meanings_set ent = function |
488 | - | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") -> | ||
489 | - if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id) | 508 | + | SameAs({hash=true; numbers=[(*ent_id;*)id]; suffix="mng"},"lexical_unit") -> |
509 | + (*if ent_id = ent then*) id (*else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)*) | ||
490 | | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml) | 510 | | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml) |
491 | 511 | ||
492 | let load_frame ent = function | 512 | let load_frame ent = function |
493 | | Fs("frame",[ | 513 | | Fs("frame",[ |
494 | - Id{hash=false; numbers=[ent_id;id]; suffix="frm"}; | 514 | + Id{hash=false; numbers=[(*ent_id;*)id]; suffix="frm"}; |
495 | F("opinion",Symbol opinion); | 515 | F("opinion",Symbol opinion); |
496 | Fset("meanings",meanings_set); | 516 | Fset("meanings",meanings_set); |
497 | Fset("arguments",arguments_set)]) -> | 517 | Fset("arguments",arguments_set)]) -> |
498 | - if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else | 518 | + (*if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else*) |
519 | + (* Printf.printf "Frame IN %d\n" id; *) | ||
499 | {frm_id = id; | 520 | {frm_id = id; |
500 | opinion = parse_opinion opinion; | 521 | opinion = parse_opinion opinion; |
501 | meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent)); | 522 | meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent)); |
502 | arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))} | 523 | arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))} |
524 | + | SameAs({hash=true; numbers=[id]; suffix="frm"},frame) -> (* FIXME !! *) | ||
525 | + (* (try IntMap.find frames id with Not_found -> failwith ("load_frame: ^ " ^ string_of_int id)) *) | ||
526 | + (* Printf.printf "Frame OUT %d\n" id; *) | ||
527 | + {frm_id=(-id); opinion=Nieokreslony; meanings=[]; arguments=[]} | ||
503 | | xml -> failwith ("load_frame :\n " ^ tei_to_string xml) | 528 | | xml -> failwith ("load_frame :\n " ^ tei_to_string xml) |
504 | 529 | ||
505 | let load_meaning_info ent arg = function | 530 | let load_meaning_info ent arg = function |
@@ -507,7 +532,8 @@ let load_meaning_info ent arg = function | @@ -507,7 +532,8 @@ let load_meaning_info ent arg = function | ||
507 | | F("variant",TEIstring variant_string) -> {arg with variant = variant_string} | 532 | | F("variant",TEIstring variant_string) -> {arg with variant = variant_string} |
508 | | F("plwnluid",Numeric value) -> {arg with plwnluid = value} | 533 | | F("plwnluid",Numeric value) -> {arg with plwnluid = value} |
509 | | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string} | 534 | | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string} |
510 | - | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) | 535 | + (* | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) *) |
536 | + | Id{hash=false; numbers=[id]; suffix="mng"} -> {arg with mng_id = id} | ||
511 | | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml) | 537 | | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml) |
512 | 538 | ||
513 | 539 | ||
@@ -518,9 +544,9 @@ let load_meaning ent = function | @@ -518,9 +544,9 @@ let load_meaning ent = function | ||
518 | 544 | ||
519 | let load_alter_connection ent = function | 545 | let load_alter_connection ent = function |
520 | | Fs("connection", [ | 546 | | Fs("connection", [ |
521 | - F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument")); | 547 | + F("argument",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"},"argument")); |
522 | Fset("phrases",phrases)]) -> | 548 | Fset("phrases",phrases)]) -> |
523 | - if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else | 549 | + (* if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else *) |
524 | let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase -> | 550 | let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase -> |
525 | let sch_id,psn_id,phr_id = load_phrases_set ent phrase in | 551 | let sch_id,psn_id,phr_id = load_phrases_set ent phrase in |
526 | IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l), | 552 | IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l), |
@@ -528,7 +554,7 @@ let load_alter_connection ent = function | @@ -528,7 +554,7 @@ let load_alter_connection ent = function | ||
528 | if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else | 554 | if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else |
529 | IntSet.min_elt sch_set, frm_id, | 555 | IntSet.min_elt sch_set, frm_id, |
530 | {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)} | 556 | {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)} |
531 | - | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml) | 557 | + | xml -> failwith ("load_alter_connection: \n " ^ tei_to_string xml) |
532 | 558 | ||
533 | let load_alternations ent = function | 559 | let load_alternations ent = function |
534 | | Fs("alternation",[Fset("connections",connections_set)]) -> | 560 | | Fs("alternation",[Fset("connections",connections_set)]) -> |
@@ -561,6 +587,17 @@ let load_entry phrases = function | @@ -561,6 +587,17 @@ let load_entry phrases = function | ||
561 | | xml -> failwith ("load_entry: \n" ^ tei_to_string xml))) | 587 | | xml -> failwith ("load_entry: \n" ^ tei_to_string xml))) |
562 | | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) | 588 | | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) |
563 | 589 | ||
590 | +let add_known_frames known_frames e = | ||
591 | + Xlist.fold e.frames known_frames (fun known_frames f -> | ||
592 | + if f.frm_id < 0 then known_frames else IntMap.add known_frames f.frm_id f) | ||
593 | + | ||
594 | +let expand_frames known_frames e = | ||
595 | + {e with frames = | ||
596 | + List.rev (Xlist.rev_map e.frames (fun f -> | ||
597 | + if f.frm_id < 0 then | ||
598 | + try IntMap.find known_frames (-f.frm_id) with Not_found -> failwith "expand_frames" | ||
599 | + else f))} | ||
600 | + | ||
564 | let load_walenty filename = | 601 | let load_walenty filename = |
565 | begin | 602 | begin |
566 | match Xml.parse_file filename with | 603 | match Xml.parse_file filename with |
@@ -568,7 +605,9 @@ let load_walenty filename = | @@ -568,7 +605,9 @@ let load_walenty filename = | ||
568 | [Xml.Element("teiHeader",_,_) ; | 605 | [Xml.Element("teiHeader",_,_) ; |
569 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> | 606 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> |
570 | let phrases = ref IntMap.empty in | 607 | let phrases = ref IntMap.empty in |
571 | - let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in | 608 | + let walenty = Xlist.rev_map entries (load_entry phrases) in |
609 | + let known_frames = Xlist.fold walenty IntMap.empty add_known_frames in | ||
610 | + let walenty = Xlist.rev_map walenty (expand_frames known_frames) in | ||
572 | walenty, !phrases | 611 | walenty, !phrases |
573 | | _ -> failwith "load_walenty" | 612 | | _ -> failwith "load_walenty" |
574 | end | 613 | end |
walenty/ENIAMwalTypes.ml
@@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef | @@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef | ||
33 | type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef | 33 | type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef |
34 | (* type acm = Acm of string | AcmUndef *) | 34 | (* type acm = Acm of string | AcmUndef *) |
35 | 35 | ||
36 | -type gf = SUBJ | OBJ | ARG | 36 | +type gf = SUBJ | OBJ | ARG | HEAD (* FIXME *) |
37 | 37 | ||
38 | type pos = | 38 | type pos = |
39 | SUBST of number * case | 39 | SUBST of number * case |
@@ -219,3 +219,14 @@ type entry = {ent_id: int; | @@ -219,3 +219,14 @@ type entry = {ent_id: int; | ||
219 | 219 | ||
220 | let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[]; | 220 | let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[]; |
221 | frames=[]; meanings=[]; alternations=[]} | 221 | frames=[]; meanings=[]; alternations=[]} |
222 | + | ||
223 | +type connected = {sch_id: int; | ||
224 | + frm_id: int; | ||
225 | + sopinion: opinion; | ||
226 | + fopinion: opinion; | ||
227 | + meanings: meaning list; | ||
228 | + negativity: negation; | ||
229 | + predicativity: pred; | ||
230 | + aspect: aspect; | ||
231 | + schema: position list; | ||
232 | + examples: (opinion * string) list} |
walenty/README
@@ -32,8 +32,8 @@ make clean | @@ -32,8 +32,8 @@ make clean | ||
32 | 32 | ||
33 | Credits | 33 | Credits |
34 | ------- | 34 | ------- |
35 | -Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | ||
36 | -Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences | 35 | +Copyright © 2016-2018 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> |
36 | +Copyright © 2016-2018 Institute of Computer Science Polish Academy of Sciences | ||
37 | 37 | ||
38 | Licence | 38 | Licence |
39 | ------- | 39 | ------- |
walenty/resources/README
@@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty) | @@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty) | ||
3 | 3 | ||
4 | Walenty is licensed under the following license: | 4 | Walenty is licensed under the following license: |
5 | 5 | ||
6 | -(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) | 6 | +(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) |
7 | This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/ | 7 | This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/ |
8 | Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata. | 8 | Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata. |
9 | The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk. | 9 | The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk. |