Commit 4afde66aa00c8ecb39f904e7d806e7832f12d68e

Authored by Wojciech Jaworski
1 parent 44e8be87

Dostosowanie do nowego Walentego

LCGlexicon/ENIAMcategoriesPL.ml
@@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [
37 "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; 37 "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum";
38 "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; 38 "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja";
39 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; 39 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
40 - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; 40 + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj";
41 "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; 41 "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"];
42 Pos2, []; 42 Pos2, [];
43 Cat, []; 43 Cat, [];
@@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function @@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function
413 | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] 413 | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
414 | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] 414 | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
415 | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] 415 | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
  416 + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}]
416 | lemma,"qub",[] -> 417 | lemma,"qub",[] ->
417 if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}] 418 if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}]
418 else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}] 419 else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}]
@@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [ @@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [
662 "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; 663 "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
663 "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; 664 "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
664 "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; 665 "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
  666 + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;];
665 "qub",[Lemma;Cat;Role;SNode;]; 667 "qub",[Lemma;Cat;Role;SNode;];
666 "part",[Lemma;SNode]; 668 "part",[Lemma;SNode];
667 "comp",[Lemma;SNode;];(* ctype *) 669 "comp",[Lemma;SNode;];(* ctype *)
LCGparser/ENIAM_LCGlatexOf.ml
@@ -213,7 +213,7 @@ let chart page text_fragments g = @@ -213,7 +213,7 @@ let chart page text_fragments g =
213 String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes -> 213 String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes ->
214 IntMap.fold nodes l (fun l node1 contents -> 214 IntMap.fold nodes l (fun l node1 contents ->
215 Xlist.fold contents l (fun l (node2,symbol,sem) -> 215 Xlist.fold contents l (fun l (node2,symbol,sem) ->
216 - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in 216 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
217 (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ 217 (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
218 "\\end{longtable}" 218 "\\end{longtable}"
219 219
@@ -221,7 +221,7 @@ let chart2 page text_fragments g = @@ -221,7 +221,7 @@ let chart2 page text_fragments g =
221 let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in 221 let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in
222 "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ 222 "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^
223 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> 223 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
224 - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in 224 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
225 (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ 225 (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
226 "\\end{longtable}" 226 "\\end{longtable}"
227 227
semantics/ENIAMsemGraph.ml
@@ -570,7 +570,8 @@ let rec reduce_tree = function @@ -570,7 +570,8 @@ let rec reduce_tree = function
570 (match reduce_tree c with 570 (match reduce_tree c with
571 Context c -> 571 Context c ->
572 let t,args = extract_aroles {t with arole=""} c.cx_contents in 572 let t,args = extract_aroles {t with arole=""} c.cx_contents in
573 - make_relation t (Context {c with cx_contents=args}) 573 + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *)
  574 + Relation(t.role,"",Context {c with cx_contents=args})
574 | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c)))) 575 | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c))))
575 | c -> ManageCoordination(t,c)) 576 | c -> ManageCoordination(t,c))
576 | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree)) 577 | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree))
subsyntax/ENIAMsubsyntax.ml
@@ -325,7 +325,7 @@ let parse query = @@ -325,7 +325,7 @@ let parse query =
325 let paths,_ = ENIAM_MWE.process paths in 325 let paths,_ = ENIAM_MWE.process paths in
326 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *) 326 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *)
327 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) 327 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
328 - let paths = List.rev (Xlist.rev_map paths find_proper_names) in 328 + let paths = if !recognize_proper_names then List.rev (Xlist.rev_map paths find_proper_names) else paths in
329 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *) 329 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *)
330 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) 330 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
331 let paths = modify_weights paths in 331 let paths = modify_weights paths in
subsyntax/ENIAMsubsyntaxTypes.ml
@@ -46,6 +46,7 @@ type text = @@ -46,6 +46,7 @@ type text =
46 | AltText of (mode * text) list 46 | AltText of (mode * text) list
47 47
48 let strong_disambiguate_flag = ref false 48 let strong_disambiguate_flag = ref false
  49 +let recognize_proper_names = ref true
49 50
50 let data_path = 51 let data_path =
51 try Sys.getenv "ENIAM_USER_DATA_PATH" 52 try Sys.getenv "ENIAM_USER_DATA_PATH"
subsyntax/interface.ml
@@ -43,6 +43,8 @@ let spec_list = [ @@ -43,6 +43,8 @@ let spec_list = [
43 "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)"; 43 "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)";
44 "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided"; 44 "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided";
45 "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)"; 45 "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)";
  46 + "--proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=true), "Recognize proper names (default)";
  47 + "--no-proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=false), "Do not recognize proper names";
46 ] 48 ]
47 49
48 let usage_msg = 50 let usage_msg =
walenty/ENIAMwalAnalyze.ml
@@ -93,7 +93,7 @@ let walenty_filename,expands_filename = @@ -93,7 +93,7 @@ let walenty_filename,expands_filename =
93 (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml", 93 (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml",
94 "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *) 94 "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *)
95 95
96 -let _ = 96 +(*let _ =
97 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in 97 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
98 let walenty = Xlist.rev_map walenty correct_walenty in 98 let walenty = Xlist.rev_map walenty correct_walenty in
99 let expands = ENIAMwalTEI.load_expands expands_filename in 99 let expands = ENIAMwalTEI.load_expands expands_filename in
@@ -126,7 +126,7 @@ let _ = @@ -126,7 +126,7 @@ let _ =
126 File.file_out "results/controll.tab" (fun file -> 126 File.file_out "results/controll.tab" (fun file ->
127 StringMap.iter cmap (fun s l -> 127 StringMap.iter cmap (fun s l ->
128 Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l))); 128 Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l)));
129 - () 129 + ()*)
130 130
131 (* Test unikalności indeksów sensów *) 131 (* Test unikalności indeksów sensów *)
132 (* let _ = 132 (* let _ =
@@ -339,3 +339,110 @@ let has_realization = function @@ -339,3 +339,110 @@ let has_realization = function
339 339
340 (* let _ = print_entries entries *) 340 (* let _ = print_entries entries *)
341 *) 341 *)
  342 +
  343 +let selected_phrases =
  344 + File.fold_tab "results/phrases_cp.tab" IntSet.empty (fun set -> function
  345 + [id;_] -> IntSet.add set (int_of_string id)
  346 + | _ -> failwith "selected_phrases")
  347 +
  348 +let print_phrases filename phrases =
  349 + File.file_out filename (fun file ->
  350 + IntMap.iter phrases (fun id morf ->
  351 + Printf.fprintf file "%d\t%s\n" id (ENIAMwalStringOf.morf morf)))
  352 +
  353 +let rec connected_schema schema =
  354 + String.concat "+" (Xlist.map schema (fun s ->
  355 + String.concat "," (
  356 + (if s.gf = ARG then [] else [ENIAMwalStringOf.gf s.gf])@
  357 + s.mode@(ENIAMwalStringOf.controllers s.cr)@(ENIAMwalStringOf.controllees s.ce)) ^
  358 + "{" ^ String.concat ";" (Xlist.map s.morfs ENIAMwalStringOf.morf) ^ "}:" ^ ENIAMwalStringOf.sem_frame s))
  359 +
  360 +let print_connected filename connected =
  361 + File.file_out filename (fun file ->
  362 + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema,examples*) ->
  363 + Printf.fprintf file "\n\t%d\t%d\t%s: %s: %s: %s: %s: %s: %s: %s:\t%s\n"
  364 + c.sch_id c.frm_id pos lemma
  365 + (ENIAMwalStringOf.opinion c.sopinion)
  366 + (ENIAMwalStringOf.opinion c.fopinion)
  367 + (String.concat "," (Xlist.map c.meanings (fun m ->
  368 + if m.name="" then string_of_int m.mng_id else m.name ^ "-" ^ m.variant)))
  369 + (ENIAMwalStringOf.negation c.negativity)
  370 + (ENIAMwalStringOf.pred c.predicativity)
  371 + (ENIAMwalStringOf.aspect c.aspect)
  372 + (connected_schema c.schema);
  373 + Xlist.iter c.examples (fun (opinion,exm) ->
  374 + Printf.fprintf file "#%s: %s\n" (ENIAMwalStringOf.opinion opinion) exm)))
  375 +
  376 +let expand_morf phrases = function
  377 + | MorfId id ->
  378 + (try IntMap.find phrases id
  379 + with Not_found -> Printf.printf "expand_morf: %d\n" id; MorfId id)
  380 + | _ -> failwith "expand_morf"
  381 +
  382 +let expand_sel_prefs meanings = function
  383 + SynsetId id ->
  384 + (try
  385 + let m = IntMap.find meanings id in
  386 + Predef (m.name ^ "-" ^ m.variant)
  387 + with Not_found -> (*Printf.printf "expand_sel_prefs: %d\n" id;*) SynsetId id)
  388 + | s -> s
  389 +
  390 +let expand_schema phrases meanings_map c =
  391 + let schema = Xlist.map c.schema (fun (s : position) ->
  392 + {s with
  393 + morfs = Xlist.map s.morfs (expand_morf phrases);
  394 + sel_prefs = Xlist.map s.sel_prefs (expand_sel_prefs meanings_map)}) in
  395 + (* let meanings = Xlist.map c.meanings (fun id -> try IntMap.find meanings_map id with Not_found -> {empty_meaning with name=string_of_int id}) in *)
  396 + {c with (*meanings2=meanings;*) schema=schema}
  397 +
  398 +let assign_examples examples c =
  399 + let p_set = Xlist.fold c.schema IntSet.empty (fun p_set p ->
  400 + Xlist.fold p.morfs p_set (fun p_set -> function
  401 + MorfId id -> IntSet.add p_set id
  402 + | _ -> p_set)) in
  403 + let m_set = Xlist.fold c.meanings IntSet.empty (fun m_set m -> IntSet.add m_set m.mng_id) in
  404 + let examples = Xlist.fold examples [] (fun examples (e : example) ->
  405 + let b = Xlist.fold e.phrases false (fun b (sch_id,_,morf_id) ->
  406 + if c.sch_id = sch_id && IntSet.mem p_set morf_id then true else b) in
  407 + if IntSet.mem m_set e.meaning && b then e :: examples else examples) in
  408 + let examples = Xlist.rev_map examples (fun e -> e.opinion,e.sentence) in
  409 + {c with examples=examples}
  410 +
  411 +let select_morfs morfs =
  412 + List.rev (Xlist.fold morfs [] (fun morfs -> function
  413 + MorfId id -> if IntSet.mem selected_phrases id then (MorfId id) :: morfs else morfs
  414 + | _ -> failwith "select_morfs"))
  415 +
  416 +let select_positions schema =
  417 + List.rev (Xlist.fold schema [] (fun schema p ->
  418 + let morfs = select_morfs p.morfs in
  419 + if morfs = [] then schema else
  420 + {p with morfs = morfs} :: schema))
  421 +
  422 +let select_entries entries =
  423 + Xlist.fold entries [] (fun entries c ->
  424 + let schema = select_positions c.schema in
  425 + if schema = [] then entries else c :: entries)
  426 +
  427 +(* Wypisanie podrzędników zdaniowych *)
  428 +let _ =
  429 + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  430 + print_phrases "results/phrases.tab" phrases;
  431 + let meanings =
  432 + Xlist.fold walenty IntMap.empty (fun meanings entry ->
  433 + Xlist.fold entry.meanings meanings (fun meanings meaning ->
  434 + IntMap.add meanings meaning.mng_id meaning)) in
  435 + let connected_walenty =
  436 + Xlist.fold walenty Entries.empty (fun connected_walenty e ->
  437 + (* print_endline "1"; *)
  438 + let entries = ENIAMwalConnect.connect e in
  439 + (* print_endline "2"; *)
  440 + let entries = select_entries entries in
  441 + (* print_endline "3"; *)
  442 + let entries = Xlist.rev_map entries (assign_examples e.examples) in
  443 + (* print_endline "4"; *)
  444 + let entries = Xlist.rev_map entries (expand_schema phrases meanings) in
  445 + (* print_endline "5"; *)
  446 + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in
  447 + print_connected "results/connected.tab" connected_walenty;
  448 + ()
walenty/ENIAMwalConnect.ml
@@ -30,7 +30,7 @@ let process_positions positions = @@ -30,7 +30,7 @@ let process_positions positions =
30 IntMap.add positions position.psn_id position) 30 IntMap.add positions position.psn_id position)
31 31
32 let process_schemata schemata = 32 let process_schemata schemata =
33 - Xlist.fold schemata IntMap.empty (fun schemata schema -> 33 + Xlist.fold schemata IntMap.empty (fun schemata (schema : schema) ->
34 let atrs = schema.negativity, schema.predicativity, schema.aspect in 34 let atrs = schema.negativity, schema.predicativity, schema.aspect in
35 let positions = process_positions schema.positions in 35 let positions = process_positions schema.positions in
36 IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions)) 36 IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions))
@@ -44,9 +44,9 @@ let process_frames frames = @@ -44,9 +44,9 @@ let process_frames frames =
44 let arguments = process_arguments frame.arguments in 44 let arguments = process_arguments frame.arguments in
45 IntMap.add frames frame.frm_id (frame,arguments)) 45 IntMap.add frames frame.frm_id (frame,arguments))
46 46
47 -(* let process_meanings meanings = 47 +let process_meanings meanings =
48 Xlist.fold meanings IntMap.empty (fun meanings meaning -> 48 Xlist.fold meanings IntMap.empty (fun meanings meaning ->
49 - IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) *) 49 + IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*))
50 50
51 let process_sel_pref arguments = function 51 let process_sel_pref arguments = function
52 SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*) 52 SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*)
@@ -59,9 +59,9 @@ let process_sel_pref arguments = function @@ -59,9 +59,9 @@ let process_sel_pref arguments = function
59 let connect entry = 59 let connect entry =
60 let schemata = process_schemata entry.schemata in 60 let schemata = process_schemata entry.schemata in
61 let frames = process_frames entry.frames in 61 let frames = process_frames entry.frames in
62 - (* let meanings = process_meanings entry.meanings in *) 62 + let meanings = process_meanings entry.meanings in
63 Xlist.fold entry.alternations [] (fun found alt -> 63 Xlist.fold entry.alternations [] (fun found alt ->
64 - let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in 64 + let refl,opinion,(n,p,a),positions = IntMap.find schemata alt.schema in
65 let frame,arguments = IntMap.find frames alt.frame in 65 let frame,arguments = IntMap.find frames alt.frame in
66 let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in 66 let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in
67 let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn -> 67 let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn ->
@@ -75,9 +75,10 @@ let connect entry = @@ -75,9 +75,10 @@ let connect entry =
75 with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in 75 with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in
76 {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; 76 {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs;
77 morfs=List.rev morfs} :: conn_positions)) in 77 morfs=List.rev morfs} :: conn_positions)) in
78 - (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->  
79 - IntMap.find meanings id)) in *)  
80 - (opinion,frame.opinion,frame.meanings,schema_atrs,conn_positions) :: found) 78 + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->
  79 + try IntMap.find meanings id with Not_found -> {empty_meaning with mng_id=id})) in
  80 + {sch_id=alt.schema; frm_id=alt.frame; sopinion=opinion; fopinion=frame.opinion; meanings=meanings;
  81 + negativity=n; predicativity=p;aspect=a; schema=conn_positions; examples=[]} :: found)
81 82
82 let schemata entry = 83 let schemata entry =
83 let schemata = process_schemata entry.schemata in 84 let schemata = process_schemata entry.schemata in
walenty/ENIAMwalGenerate.ml
@@ -31,40 +31,54 @@ let correct_walenty entry = @@ -31,40 +31,54 @@ let correct_walenty entry =
31 else entry 31 else entry
32 32
33 let load_walenty walenty_filename expands_filename = 33 let load_walenty walenty_filename expands_filename =
  34 + print_endline "load_walenty 1";
34 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in 35 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  36 + print_endline "load_walenty 2";
35 let walenty = Xlist.rev_map walenty correct_walenty in 37 let walenty = Xlist.rev_map walenty correct_walenty in
  38 + print_endline "load_walenty 3";
36 let expands = ENIAMwalTEI.load_expands expands_filename in 39 let expands = ENIAMwalTEI.load_expands expands_filename in
  40 + print_endline "load_walenty 4";
37 let meanings = 41 let meanings =
38 Xlist.fold walenty IntMap.empty (fun meanings entry -> 42 Xlist.fold walenty IntMap.empty (fun meanings entry ->
39 Xlist.fold entry.meanings meanings (fun meanings meaning -> 43 Xlist.fold entry.meanings meanings (fun meanings meaning ->
40 IntMap.add meanings meaning.mng_id meaning)) in 44 IntMap.add meanings meaning.mng_id meaning)) in
  45 + print_endline "load_walenty 5";
41 let connected_walenty = 46 let connected_walenty =
42 Xlist.fold walenty Entries.empty (fun connected_walenty e -> 47 Xlist.fold walenty Entries.empty (fun connected_walenty e ->
43 let entries = ENIAMwalConnect.connect e in 48 let entries = ENIAMwalConnect.connect e in
44 Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in 49 Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in
  50 + print_endline "load_walenty 6";
45 let schemata_walenty = 51 let schemata_walenty =
46 Xlist.fold walenty Entries.empty (fun schemata_walenty e -> 52 Xlist.fold walenty Entries.empty (fun schemata_walenty e ->
47 let entries = ENIAMwalConnect.schemata e in 53 let entries = ENIAMwalConnect.schemata e in
48 Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in 54 Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in
  55 + print_endline "load_walenty 7";
49 let expands,compreps,subtypes,equivs,adv_types = 56 let expands,compreps,subtypes,equivs,adv_types =
50 ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in 57 ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in
  58 + print_endline "load_walenty 8";
51 let phrases = 59 let phrases =
52 IntMap.map phrases (fun morf -> 60 IntMap.map phrases (fun morf ->
53 let morf = ENIAMwalRealizations.expand_schema_morf expands morf in 61 let morf = ENIAMwalRealizations.expand_schema_morf expands morf in
54 let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in 62 let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in
55 let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in 63 let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in
56 morf) in 64 morf) in
  65 + print_endline "load_walenty 9";
57 let compreps = Xlist.map compreps (fun (lemma,morfs) -> 66 let compreps = Xlist.map compreps (fun (lemma,morfs) ->
58 lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in 67 lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in
  68 + print_endline "load_walenty 10";
59 let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in 69 let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in
  70 + print_endline "load_walenty 11";
60 let phrases,entries = 71 let phrases,entries =
61 IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> 72 IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs ->
62 let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in 73 let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in
63 let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in 74 let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in
64 IntMap.add phrases id morfs, entries) in 75 IntMap.add phrases id morfs, entries) in
  76 + print_endline "load_walenty 12";
65 let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) -> 77 let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) ->
66 Entries.add_inc entries pos lemma entry) in 78 Entries.add_inc entries pos lemma entry) in
  79 + print_endline "load_walenty 13";
67 let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in 80 let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in
  81 + print_endline "load_walenty 14";
68 let entries = Entries.flatten_map entries (fun pos lemma entry -> 82 let entries = Entries.flatten_map entries (fun pos lemma entry ->
69 ENIAMwalLex.expand_restr [] lemma pos entry) in 83 ENIAMwalLex.expand_restr [] lemma pos entry) in
70 (* let entries = 84 (* let entries =
@@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename = @@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename =
72 StringMap.mapi entries2 (fun lemma entries3 -> 86 StringMap.mapi entries2 (fun lemma entries3 ->
73 EntrySet.fold entries3 [] (fun entries3 entry -> 87 EntrySet.fold entries3 [] (fun entries3 entry ->
74 (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *) 88 (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *)
  89 + print_endline "load_walenty 15";
75 connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types 90 connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types
76 91
77 let print_entries filename entries = 92 let print_entries filename entries =
@@ -97,15 +112,15 @@ let print_schemata filename schemata = @@ -97,15 +112,15 @@ let print_schemata filename schemata =
97 112
98 let print_connected filename connected = 113 let print_connected filename connected =
99 File.file_out filename (fun file -> 114 File.file_out filename (fun file ->
100 - Entries.iter connected (fun pos lemma (sopinion,fopinion,meanings,(n,p,a),schema) -> 115 + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema*) ->
101 Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma 116 Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma
102 - (ENIAMwalStringOf.opinion sopinion)  
103 - (ENIAMwalStringOf.opinion fopinion)  
104 - (String.concat "," (Xlist.map meanings string_of_int))  
105 - (ENIAMwalStringOf.negation n)  
106 - (ENIAMwalStringOf.pred p)  
107 - (ENIAMwalStringOf.aspect a)  
108 - (ENIAMwalStringOf.connected_schema schema))) 117 + (ENIAMwalStringOf.opinion c.sopinion)
  118 + (ENIAMwalStringOf.opinion c.fopinion)
  119 + (String.concat "," (Xlist.map c.meanings (fun m -> string_of_int m.mng_id)))
  120 + (ENIAMwalStringOf.negation c.negativity)
  121 + (ENIAMwalStringOf.pred c.predicativity)
  122 + (ENIAMwalStringOf.aspect c.aspect)
  123 + (ENIAMwalStringOf.connected_schema c.schema)))
109 124
110 let split_tokens s = 125 let split_tokens s =
111 let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function 126 let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function
walenty/ENIAMwalStringOf.ml
@@ -101,6 +101,7 @@ let gf = function @@ -101,6 +101,7 @@ let gf = function
101 SUBJ -> "subj" 101 SUBJ -> "subj"
102 | OBJ -> "obj" 102 | OBJ -> "obj"
103 | ARG -> "arg"(*""*) 103 | ARG -> "arg"(*""*)
  104 + | HEAD -> "head"
104 105
105 let pos = function 106 let pos = function
106 SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")" 107 SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
walenty/ENIAMwalTEI.ml
@@ -60,7 +60,8 @@ let rec tei_to_string = function @@ -60,7 +60,8 @@ let rec tei_to_string = function
60 | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) 60 | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
61 | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) 61 | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
62 | Id id -> Printf.sprintf "Id(%s)" (string_of_id id) 62 | Id id -> Printf.sprintf "Id(%s)" (string_of_id id)
63 - | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s 63 + (* | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s *)
  64 + | SameAs(id,s) -> Printf.sprintf "SameAs(%s,%s)" (string_of_id id) s
64 65
65 let rec parse_tei = function 66 let rec parse_tei = function
66 Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) -> 67 Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) ->
@@ -82,6 +83,7 @@ let rec parse_tei = function @@ -82,6 +83,7 @@ let rec parse_tei = function
82 let parse_gf = function 83 let parse_gf = function
83 "subj" -> SUBJ 84 "subj" -> SUBJ
84 | "obj" -> OBJ 85 | "obj" -> OBJ
  86 + | "head" -> HEAD
85 | s -> failwith ("parse_gf: " ^ s) 87 | s -> failwith ("parse_gf: " ^ s)
86 88
87 let parse_control arg = function 89 let parse_control arg = function
@@ -128,6 +130,7 @@ let parse_number = function @@ -128,6 +130,7 @@ let parse_number = function
128 130
129 let parse_gender = function 131 let parse_gender = function
130 "m1" -> Gender "m1" 132 "m1" -> Gender "m1"
  133 + | "m2" -> Gender "m2"
131 | "m3" -> Gender "m3" 134 | "m3" -> Gender "m3"
132 | "n" -> Gender "n"(*Genders["n1";"n2"]*) 135 | "n" -> Gender "n"(*Genders["n1";"n2"]*)
133 | "f" -> Gender "f" 136 | "f" -> Gender "f"
@@ -136,6 +139,21 @@ let parse_gender = function @@ -136,6 +139,21 @@ let parse_gender = function
136 | "agr" -> GenderAgr 139 | "agr" -> GenderAgr
137 | s -> failwith ("parse_gender: " ^ s) 140 | s -> failwith ("parse_gender: " ^ s)
138 141
  142 +let parse_genders = function
  143 + [Symbol "agr"] -> GenderAgr
  144 + | genders ->
  145 + let genders = Xlist.map genders (function
  146 + Symbol "m1" -> "m1"
  147 + | Symbol "m2" -> "m2"
  148 + | Symbol "m3" -> "m3"
  149 + | Symbol "n" -> "n"
  150 + | Symbol "f" -> "f"
  151 + | s -> failwith ("parse_genders: " ^ tei_to_string s)) in
  152 + (match genders with
  153 + [g] -> Gender g
  154 + | [] -> failwith "parse_genders: empty"
  155 + | _ -> Genders genders)
  156 +
139 let parse_grad = function 157 let parse_grad = function
140 "pos" -> Grad "pos" 158 "pos" -> Grad "pos"
141 | "com" -> Grad "com" 159 | "com" -> Grad "com"
@@ -310,7 +328,7 @@ and load_lex arg xml = match xml with @@ -310,7 +328,7 @@ and load_lex arg xml = match xml with
310 | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue} 328 | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue}
311 | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse} 329 | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse}
312 | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty} 330 | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty}
313 - | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value} 331 + | Fset("gender",genders) -> {arg with lex_gender = parse_genders genders}
314 | xml -> 332 | xml ->
315 Printf.printf "%s\n" (tei_to_string xml); 333 Printf.printf "%s\n" (tei_to_string xml);
316 failwith "load_lex:\n " 334 failwith "load_lex:\n "
@@ -436,6 +454,8 @@ let load_phrases_set ent = function @@ -436,6 +454,8 @@ let load_phrases_set ent = function
436 let load_example_info ent arg = function 454 let load_example_info ent arg = function
437 | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) -> 455 | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) ->
438 if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id) 456 if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
  457 + | F("meaning",SameAs({hash=true; numbers=[id]; suffix="mng"},"lexical_unit")) ->
  458 + {arg with meaning = id}
439 | Fset("phrases",phrases_set) -> 459 | Fset("phrases",phrases_set) ->
440 {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))} 460 {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))}
441 | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string} 461 | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}
@@ -456,8 +476,8 @@ let load_example ent = function @@ -456,8 +476,8 @@ let load_example ent = function
456 let load_self_prefs_sets name ent frm = function 476 let load_self_prefs_sets name ent frm = function
457 | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets" 477 | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets"
458 | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets" 478 | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets"
459 - | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) ->  
460 - if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id) 479 + | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"}, "argument"))]) ->
  480 + if (*ent_id <> ent ||*) frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d" ent (*ent_id*))
461 else RelationArgId(value,arg_id) 481 else RelationArgId(value,arg_id)
462 | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml) 482 | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml)
463 483
@@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function @@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function
472 | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) -> 492 | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->
473 {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))} 493 {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))}
474 (* | Id id -> {arg with arg_id = id} *) 494 (* | Id id -> {arg with arg_id = id} *)
475 - | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} ->  
476 - if ent_id = ent && frm_id = frm then {arg with arg_id = id}  
477 - else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id) 495 + | Id{hash=false; numbers=[(*ent_id;*)frm_id;id]; suffix="arg"} ->
  496 + if (*ent_id = ent &&*) frm_id = frm then {arg with arg_id = id}
  497 + else failwith (Printf.sprintf "load_argument_info %d" ent (*ent_id*))
478 | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml) 498 | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)
479 499
480 let load_arguments_set ent frm = function 500 let load_arguments_set ent frm = function
@@ -485,21 +505,26 @@ let load_arguments_set ent frm = function @@ -485,21 +505,26 @@ let load_arguments_set ent frm = function
485 | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml) 505 | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)
486 506
487 let load_meanings_set ent = function 507 let load_meanings_set ent = function
488 - | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") ->  
489 - if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id) 508 + | SameAs({hash=true; numbers=[(*ent_id;*)id]; suffix="mng"},"lexical_unit") ->
  509 + (*if ent_id = ent then*) id (*else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)*)
490 | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml) 510 | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)
491 511
492 let load_frame ent = function 512 let load_frame ent = function
493 | Fs("frame",[ 513 | Fs("frame",[
494 - Id{hash=false; numbers=[ent_id;id]; suffix="frm"}; 514 + Id{hash=false; numbers=[(*ent_id;*)id]; suffix="frm"};
495 F("opinion",Symbol opinion); 515 F("opinion",Symbol opinion);
496 Fset("meanings",meanings_set); 516 Fset("meanings",meanings_set);
497 Fset("arguments",arguments_set)]) -> 517 Fset("arguments",arguments_set)]) ->
498 - if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else 518 + (*if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else*)
  519 + (* Printf.printf "Frame IN %d\n" id; *)
499 {frm_id = id; 520 {frm_id = id;
500 opinion = parse_opinion opinion; 521 opinion = parse_opinion opinion;
501 meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent)); 522 meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent));
502 arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))} 523 arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))}
  524 + | SameAs({hash=true; numbers=[id]; suffix="frm"},frame) -> (* FIXME !! *)
  525 + (* (try IntMap.find frames id with Not_found -> failwith ("load_frame: ^ " ^ string_of_int id)) *)
  526 + (* Printf.printf "Frame OUT %d\n" id; *)
  527 + {frm_id=(-id); opinion=Nieokreslony; meanings=[]; arguments=[]}
503 | xml -> failwith ("load_frame :\n " ^ tei_to_string xml) 528 | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)
504 529
505 let load_meaning_info ent arg = function 530 let load_meaning_info ent arg = function
@@ -507,7 +532,8 @@ let load_meaning_info ent arg = function @@ -507,7 +532,8 @@ let load_meaning_info ent arg = function
507 | F("variant",TEIstring variant_string) -> {arg with variant = variant_string} 532 | F("variant",TEIstring variant_string) -> {arg with variant = variant_string}
508 | F("plwnluid",Numeric value) -> {arg with plwnluid = value} 533 | F("plwnluid",Numeric value) -> {arg with plwnluid = value}
509 | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string} 534 | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string}
510 - | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) 535 + (* | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) *)
  536 + | Id{hash=false; numbers=[id]; suffix="mng"} -> {arg with mng_id = id}
511 | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml) 537 | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml)
512 538
513 539
@@ -518,9 +544,9 @@ let load_meaning ent = function @@ -518,9 +544,9 @@ let load_meaning ent = function
518 544
519 let load_alter_connection ent = function 545 let load_alter_connection ent = function
520 | Fs("connection", [ 546 | Fs("connection", [
521 - F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument")); 547 + F("argument",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"},"argument"));
522 Fset("phrases",phrases)]) -> 548 Fset("phrases",phrases)]) ->
523 - if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else 549 + (* if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else *)
524 let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase -> 550 let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase ->
525 let sch_id,psn_id,phr_id = load_phrases_set ent phrase in 551 let sch_id,psn_id,phr_id = load_phrases_set ent phrase in
526 IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l), 552 IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l),
@@ -528,7 +554,7 @@ let load_alter_connection ent = function @@ -528,7 +554,7 @@ let load_alter_connection ent = function
528 if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else 554 if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else
529 IntSet.min_elt sch_set, frm_id, 555 IntSet.min_elt sch_set, frm_id,
530 {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)} 556 {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)}
531 - | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml) 557 + | xml -> failwith ("load_alter_connection: \n " ^ tei_to_string xml)
532 558
533 let load_alternations ent = function 559 let load_alternations ent = function
534 | Fs("alternation",[Fset("connections",connections_set)]) -> 560 | Fs("alternation",[Fset("connections",connections_set)]) ->
@@ -561,6 +587,17 @@ let load_entry phrases = function @@ -561,6 +587,17 @@ let load_entry phrases = function
561 | xml -> failwith ("load_entry: \n" ^ tei_to_string xml))) 587 | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))
562 | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) 588 | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
563 589
  590 +let add_known_frames known_frames e =
  591 + Xlist.fold e.frames known_frames (fun known_frames f ->
  592 + if f.frm_id < 0 then known_frames else IntMap.add known_frames f.frm_id f)
  593 +
  594 +let expand_frames known_frames e =
  595 + {e with frames =
  596 + List.rev (Xlist.rev_map e.frames (fun f ->
  597 + if f.frm_id < 0 then
  598 + try IntMap.find known_frames (-f.frm_id) with Not_found -> failwith "expand_frames"
  599 + else f))}
  600 +
564 let load_walenty filename = 601 let load_walenty filename =
565 begin 602 begin
566 match Xml.parse_file filename with 603 match Xml.parse_file filename with
@@ -568,7 +605,9 @@ let load_walenty filename = @@ -568,7 +605,9 @@ let load_walenty filename =
568 [Xml.Element("teiHeader",_,_) ; 605 [Xml.Element("teiHeader",_,_) ;
569 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> 606 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
570 let phrases = ref IntMap.empty in 607 let phrases = ref IntMap.empty in
571 - let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in 608 + let walenty = Xlist.rev_map entries (load_entry phrases) in
  609 + let known_frames = Xlist.fold walenty IntMap.empty add_known_frames in
  610 + let walenty = Xlist.rev_map walenty (expand_frames known_frames) in
572 walenty, !phrases 611 walenty, !phrases
573 | _ -> failwith "load_walenty" 612 | _ -> failwith "load_walenty"
574 end 613 end
walenty/ENIAMwalTypes.ml
@@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef @@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef
33 type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef 33 type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
34 (* type acm = Acm of string | AcmUndef *) 34 (* type acm = Acm of string | AcmUndef *)
35 35
36 -type gf = SUBJ | OBJ | ARG 36 +type gf = SUBJ | OBJ | ARG | HEAD (* FIXME *)
37 37
38 type pos = 38 type pos =
39 SUBST of number * case 39 SUBST of number * case
@@ -219,3 +219,14 @@ type entry = {ent_id: int; @@ -219,3 +219,14 @@ type entry = {ent_id: int;
219 219
220 let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[]; 220 let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
221 frames=[]; meanings=[]; alternations=[]} 221 frames=[]; meanings=[]; alternations=[]}
  222 +
  223 +type connected = {sch_id: int;
  224 + frm_id: int;
  225 + sopinion: opinion;
  226 + fopinion: opinion;
  227 + meanings: meaning list;
  228 + negativity: negation;
  229 + predicativity: pred;
  230 + aspect: aspect;
  231 + schema: position list;
  232 + examples: (opinion * string) list}
walenty/README
@@ -32,8 +32,8 @@ make clean @@ -32,8 +32,8 @@ make clean
32 32
33 Credits 33 Credits
34 ------- 34 -------
35 -Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
36 -Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences 35 +Copyright © 2016-2018 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  36 +Copyright © 2016-2018 Institute of Computer Science Polish Academy of Sciences
37 37
38 Licence 38 Licence
39 ------- 39 -------
walenty/resources/README
@@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty) @@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)
3 3
4 Walenty is licensed under the following license: 4 Walenty is licensed under the following license:
5 5
6 -(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) 6 +(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)
7 This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/ 7 This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/
8 Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata. 8 Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata.
9 The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk. 9 The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.