Commit 4afde66aa00c8ecb39f904e7d806e7832f12d68e

Authored by Wojciech Jaworski
1 parent 44e8be87

Dostosowanie do nowego Walentego

LCGlexicon/ENIAMcategoriesPL.ml
... ... @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [
37 37 "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum";
38 38 "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja";
39 39 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
40   - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj";
  40 + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj";
41 41 "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"];
42 42 Pos2, [];
43 43 Cat, [];
... ... @@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function
413 413 | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
414 414 | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
415 415 | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}]
  416 + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}]
416 417 | lemma,"qub",[] ->
417 418 if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}]
418 419 else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}]
... ... @@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [
662 663 "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
663 664 "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
664 665 "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;];
  666 + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;];
665 667 "qub",[Lemma;Cat;Role;SNode;];
666 668 "part",[Lemma;SNode];
667 669 "comp",[Lemma;SNode;];(* ctype *)
... ...
LCGparser/ENIAM_LCGlatexOf.ml
... ... @@ -213,7 +213,7 @@ let chart page text_fragments g =
213 213 String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes ->
214 214 IntMap.fold nodes l (fun l node1 contents ->
215 215 Xlist.fold contents l (fun l (node2,symbol,sem) ->
216   - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
  216 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
217 217 (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
218 218 "\\end{longtable}"
219 219  
... ... @@ -221,7 +221,7 @@ let chart2 page text_fragments g =
221 221 let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in
222 222 "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^
223 223 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
224   - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
  224 + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
225 225 (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
226 226 "\\end{longtable}"
227 227  
... ...
semantics/ENIAMsemGraph.ml
... ... @@ -570,7 +570,8 @@ let rec reduce_tree = function
570 570 (match reduce_tree c with
571 571 Context c ->
572 572 let t,args = extract_aroles {t with arole=""} c.cx_contents in
573   - make_relation t (Context {c with cx_contents=args})
  573 + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *)
  574 + Relation(t.role,"",Context {c with cx_contents=args})
574 575 | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c))))
575 576 | c -> ManageCoordination(t,c))
576 577 | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree))
... ...
subsyntax/ENIAMsubsyntax.ml
... ... @@ -325,7 +325,7 @@ let parse query =
325 325 let paths,_ = ENIAM_MWE.process paths in
326 326 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *)
327 327 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
328   - let paths = List.rev (Xlist.rev_map paths find_proper_names) in
  328 + let paths = if !recognize_proper_names then List.rev (Xlist.rev_map paths find_proper_names) else paths in
329 329 (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *)
330 330 (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *)
331 331 let paths = modify_weights paths in
... ...
subsyntax/ENIAMsubsyntaxTypes.ml
... ... @@ -46,6 +46,7 @@ type text =
46 46 | AltText of (mode * text) list
47 47  
48 48 let strong_disambiguate_flag = ref false
  49 +let recognize_proper_names = ref true
49 50  
50 51 let data_path =
51 52 try Sys.getenv "ENIAM_USER_DATA_PATH"
... ...
subsyntax/interface.ml
... ... @@ -43,6 +43,8 @@ let spec_list = [
43 43 "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)";
44 44 "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided";
45 45 "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)";
  46 + "--proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=true), "Recognize proper names (default)";
  47 + "--no-proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=false), "Do not recognize proper names";
46 48 ]
47 49  
48 50 let usage_msg =
... ...
walenty/ENIAMwalAnalyze.ml
... ... @@ -93,7 +93,7 @@ let walenty_filename,expands_filename =
93 93 (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml",
94 94 "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *)
95 95  
96   -let _ =
  96 +(*let _ =
97 97 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
98 98 let walenty = Xlist.rev_map walenty correct_walenty in
99 99 let expands = ENIAMwalTEI.load_expands expands_filename in
... ... @@ -126,7 +126,7 @@ let _ =
126 126 File.file_out "results/controll.tab" (fun file ->
127 127 StringMap.iter cmap (fun s l ->
128 128 Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l)));
129   - ()
  129 + ()*)
130 130  
131 131 (* Test unikalności indeksów sensów *)
132 132 (* let _ =
... ... @@ -339,3 +339,110 @@ let has_realization = function
339 339  
340 340 (* let _ = print_entries entries *)
341 341 *)
  342 +
  343 +let selected_phrases =
  344 + File.fold_tab "results/phrases_cp.tab" IntSet.empty (fun set -> function
  345 + [id;_] -> IntSet.add set (int_of_string id)
  346 + | _ -> failwith "selected_phrases")
  347 +
  348 +let print_phrases filename phrases =
  349 + File.file_out filename (fun file ->
  350 + IntMap.iter phrases (fun id morf ->
  351 + Printf.fprintf file "%d\t%s\n" id (ENIAMwalStringOf.morf morf)))
  352 +
  353 +let rec connected_schema schema =
  354 + String.concat "+" (Xlist.map schema (fun s ->
  355 + String.concat "," (
  356 + (if s.gf = ARG then [] else [ENIAMwalStringOf.gf s.gf])@
  357 + s.mode@(ENIAMwalStringOf.controllers s.cr)@(ENIAMwalStringOf.controllees s.ce)) ^
  358 + "{" ^ String.concat ";" (Xlist.map s.morfs ENIAMwalStringOf.morf) ^ "}:" ^ ENIAMwalStringOf.sem_frame s))
  359 +
  360 +let print_connected filename connected =
  361 + File.file_out filename (fun file ->
  362 + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema,examples*) ->
  363 + Printf.fprintf file "\n\t%d\t%d\t%s: %s: %s: %s: %s: %s: %s: %s:\t%s\n"
  364 + c.sch_id c.frm_id pos lemma
  365 + (ENIAMwalStringOf.opinion c.sopinion)
  366 + (ENIAMwalStringOf.opinion c.fopinion)
  367 + (String.concat "," (Xlist.map c.meanings (fun m ->
  368 + if m.name="" then string_of_int m.mng_id else m.name ^ "-" ^ m.variant)))
  369 + (ENIAMwalStringOf.negation c.negativity)
  370 + (ENIAMwalStringOf.pred c.predicativity)
  371 + (ENIAMwalStringOf.aspect c.aspect)
  372 + (connected_schema c.schema);
  373 + Xlist.iter c.examples (fun (opinion,exm) ->
  374 + Printf.fprintf file "#%s: %s\n" (ENIAMwalStringOf.opinion opinion) exm)))
  375 +
  376 +let expand_morf phrases = function
  377 + | MorfId id ->
  378 + (try IntMap.find phrases id
  379 + with Not_found -> Printf.printf "expand_morf: %d\n" id; MorfId id)
  380 + | _ -> failwith "expand_morf"
  381 +
  382 +let expand_sel_prefs meanings = function
  383 + SynsetId id ->
  384 + (try
  385 + let m = IntMap.find meanings id in
  386 + Predef (m.name ^ "-" ^ m.variant)
  387 + with Not_found -> (*Printf.printf "expand_sel_prefs: %d\n" id;*) SynsetId id)
  388 + | s -> s
  389 +
  390 +let expand_schema phrases meanings_map c =
  391 + let schema = Xlist.map c.schema (fun (s : position) ->
  392 + {s with
  393 + morfs = Xlist.map s.morfs (expand_morf phrases);
  394 + sel_prefs = Xlist.map s.sel_prefs (expand_sel_prefs meanings_map)}) in
  395 + (* let meanings = Xlist.map c.meanings (fun id -> try IntMap.find meanings_map id with Not_found -> {empty_meaning with name=string_of_int id}) in *)
  396 + {c with (*meanings2=meanings;*) schema=schema}
  397 +
  398 +let assign_examples examples c =
  399 + let p_set = Xlist.fold c.schema IntSet.empty (fun p_set p ->
  400 + Xlist.fold p.morfs p_set (fun p_set -> function
  401 + MorfId id -> IntSet.add p_set id
  402 + | _ -> p_set)) in
  403 + let m_set = Xlist.fold c.meanings IntSet.empty (fun m_set m -> IntSet.add m_set m.mng_id) in
  404 + let examples = Xlist.fold examples [] (fun examples (e : example) ->
  405 + let b = Xlist.fold e.phrases false (fun b (sch_id,_,morf_id) ->
  406 + if c.sch_id = sch_id && IntSet.mem p_set morf_id then true else b) in
  407 + if IntSet.mem m_set e.meaning && b then e :: examples else examples) in
  408 + let examples = Xlist.rev_map examples (fun e -> e.opinion,e.sentence) in
  409 + {c with examples=examples}
  410 +
  411 +let select_morfs morfs =
  412 + List.rev (Xlist.fold morfs [] (fun morfs -> function
  413 + MorfId id -> if IntSet.mem selected_phrases id then (MorfId id) :: morfs else morfs
  414 + | _ -> failwith "select_morfs"))
  415 +
  416 +let select_positions schema =
  417 + List.rev (Xlist.fold schema [] (fun schema p ->
  418 + let morfs = select_morfs p.morfs in
  419 + if morfs = [] then schema else
  420 + {p with morfs = morfs} :: schema))
  421 +
  422 +let select_entries entries =
  423 + Xlist.fold entries [] (fun entries c ->
  424 + let schema = select_positions c.schema in
  425 + if schema = [] then entries else c :: entries)
  426 +
  427 +(* Wypisanie podrzędników zdaniowych *)
  428 +let _ =
  429 + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  430 + print_phrases "results/phrases.tab" phrases;
  431 + let meanings =
  432 + Xlist.fold walenty IntMap.empty (fun meanings entry ->
  433 + Xlist.fold entry.meanings meanings (fun meanings meaning ->
  434 + IntMap.add meanings meaning.mng_id meaning)) in
  435 + let connected_walenty =
  436 + Xlist.fold walenty Entries.empty (fun connected_walenty e ->
  437 + (* print_endline "1"; *)
  438 + let entries = ENIAMwalConnect.connect e in
  439 + (* print_endline "2"; *)
  440 + let entries = select_entries entries in
  441 + (* print_endline "3"; *)
  442 + let entries = Xlist.rev_map entries (assign_examples e.examples) in
  443 + (* print_endline "4"; *)
  444 + let entries = Xlist.rev_map entries (expand_schema phrases meanings) in
  445 + (* print_endline "5"; *)
  446 + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in
  447 + print_connected "results/connected.tab" connected_walenty;
  448 + ()
... ...
walenty/ENIAMwalConnect.ml
... ... @@ -30,7 +30,7 @@ let process_positions positions =
30 30 IntMap.add positions position.psn_id position)
31 31  
32 32 let process_schemata schemata =
33   - Xlist.fold schemata IntMap.empty (fun schemata schema ->
  33 + Xlist.fold schemata IntMap.empty (fun schemata (schema : schema) ->
34 34 let atrs = schema.negativity, schema.predicativity, schema.aspect in
35 35 let positions = process_positions schema.positions in
36 36 IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions))
... ... @@ -44,9 +44,9 @@ let process_frames frames =
44 44 let arguments = process_arguments frame.arguments in
45 45 IntMap.add frames frame.frm_id (frame,arguments))
46 46  
47   -(* let process_meanings meanings =
  47 +let process_meanings meanings =
48 48 Xlist.fold meanings IntMap.empty (fun meanings meaning ->
49   - IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) *)
  49 + IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*))
50 50  
51 51 let process_sel_pref arguments = function
52 52 SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*)
... ... @@ -59,9 +59,9 @@ let process_sel_pref arguments = function
59 59 let connect entry =
60 60 let schemata = process_schemata entry.schemata in
61 61 let frames = process_frames entry.frames in
62   - (* let meanings = process_meanings entry.meanings in *)
  62 + let meanings = process_meanings entry.meanings in
63 63 Xlist.fold entry.alternations [] (fun found alt ->
64   - let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in
  64 + let refl,opinion,(n,p,a),positions = IntMap.find schemata alt.schema in
65 65 let frame,arguments = IntMap.find frames alt.frame in
66 66 let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in
67 67 let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn ->
... ... @@ -75,9 +75,10 @@ let connect entry =
75 75 with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in
76 76 {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs;
77 77 morfs=List.rev morfs} :: conn_positions)) in
78   - (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->
79   - IntMap.find meanings id)) in *)
80   - (opinion,frame.opinion,frame.meanings,schema_atrs,conn_positions) :: found)
  78 + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->
  79 + try IntMap.find meanings id with Not_found -> {empty_meaning with mng_id=id})) in
  80 + {sch_id=alt.schema; frm_id=alt.frame; sopinion=opinion; fopinion=frame.opinion; meanings=meanings;
  81 + negativity=n; predicativity=p;aspect=a; schema=conn_positions; examples=[]} :: found)
81 82  
82 83 let schemata entry =
83 84 let schemata = process_schemata entry.schemata in
... ...
walenty/ENIAMwalGenerate.ml
... ... @@ -31,40 +31,54 @@ let correct_walenty entry =
31 31 else entry
32 32  
33 33 let load_walenty walenty_filename expands_filename =
  34 + print_endline "load_walenty 1";
34 35 let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  36 + print_endline "load_walenty 2";
35 37 let walenty = Xlist.rev_map walenty correct_walenty in
  38 + print_endline "load_walenty 3";
36 39 let expands = ENIAMwalTEI.load_expands expands_filename in
  40 + print_endline "load_walenty 4";
37 41 let meanings =
38 42 Xlist.fold walenty IntMap.empty (fun meanings entry ->
39 43 Xlist.fold entry.meanings meanings (fun meanings meaning ->
40 44 IntMap.add meanings meaning.mng_id meaning)) in
  45 + print_endline "load_walenty 5";
41 46 let connected_walenty =
42 47 Xlist.fold walenty Entries.empty (fun connected_walenty e ->
43 48 let entries = ENIAMwalConnect.connect e in
44 49 Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in
  50 + print_endline "load_walenty 6";
45 51 let schemata_walenty =
46 52 Xlist.fold walenty Entries.empty (fun schemata_walenty e ->
47 53 let entries = ENIAMwalConnect.schemata e in
48 54 Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in
  55 + print_endline "load_walenty 7";
49 56 let expands,compreps,subtypes,equivs,adv_types =
50 57 ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in
  58 + print_endline "load_walenty 8";
51 59 let phrases =
52 60 IntMap.map phrases (fun morf ->
53 61 let morf = ENIAMwalRealizations.expand_schema_morf expands morf in
54 62 let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in
55 63 let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in
56 64 morf) in
  65 + print_endline "load_walenty 9";
57 66 let compreps = Xlist.map compreps (fun (lemma,morfs) ->
58 67 lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in
  68 + print_endline "load_walenty 10";
59 69 let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in
  70 + print_endline "load_walenty 11";
60 71 let phrases,entries =
61 72 IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs ->
62 73 let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in
63 74 let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in
64 75 IntMap.add phrases id morfs, entries) in
  76 + print_endline "load_walenty 12";
65 77 let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) ->
66 78 Entries.add_inc entries pos lemma entry) in
  79 + print_endline "load_walenty 13";
67 80 let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in
  81 + print_endline "load_walenty 14";
68 82 let entries = Entries.flatten_map entries (fun pos lemma entry ->
69 83 ENIAMwalLex.expand_restr [] lemma pos entry) in
70 84 (* let entries =
... ... @@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename =
72 86 StringMap.mapi entries2 (fun lemma entries3 ->
73 87 EntrySet.fold entries3 [] (fun entries3 entry ->
74 88 (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *)
  89 + print_endline "load_walenty 15";
75 90 connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types
76 91  
77 92 let print_entries filename entries =
... ... @@ -97,15 +112,15 @@ let print_schemata filename schemata =
97 112  
98 113 let print_connected filename connected =
99 114 File.file_out filename (fun file ->
100   - Entries.iter connected (fun pos lemma (sopinion,fopinion,meanings,(n,p,a),schema) ->
  115 + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema*) ->
101 116 Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma
102   - (ENIAMwalStringOf.opinion sopinion)
103   - (ENIAMwalStringOf.opinion fopinion)
104   - (String.concat "," (Xlist.map meanings string_of_int))
105   - (ENIAMwalStringOf.negation n)
106   - (ENIAMwalStringOf.pred p)
107   - (ENIAMwalStringOf.aspect a)
108   - (ENIAMwalStringOf.connected_schema schema)))
  117 + (ENIAMwalStringOf.opinion c.sopinion)
  118 + (ENIAMwalStringOf.opinion c.fopinion)
  119 + (String.concat "," (Xlist.map c.meanings (fun m -> string_of_int m.mng_id)))
  120 + (ENIAMwalStringOf.negation c.negativity)
  121 + (ENIAMwalStringOf.pred c.predicativity)
  122 + (ENIAMwalStringOf.aspect c.aspect)
  123 + (ENIAMwalStringOf.connected_schema c.schema)))
109 124  
110 125 let split_tokens s =
111 126 let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function
... ...
walenty/ENIAMwalStringOf.ml
... ... @@ -101,6 +101,7 @@ let gf = function
101 101 SUBJ -> "subj"
102 102 | OBJ -> "obj"
103 103 | ARG -> "arg"(*""*)
  104 + | HEAD -> "head"
104 105  
105 106 let pos = function
106 107 SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -60,7 +60,8 @@ let rec tei_to_string = function
60 60 | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
61 61 | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
62 62 | Id id -> Printf.sprintf "Id(%s)" (string_of_id id)
63   - | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s
  63 + (* | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s *)
  64 + | SameAs(id,s) -> Printf.sprintf "SameAs(%s,%s)" (string_of_id id) s
64 65  
65 66 let rec parse_tei = function
66 67 Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) ->
... ... @@ -82,6 +83,7 @@ let rec parse_tei = function
82 83 let parse_gf = function
83 84 "subj" -> SUBJ
84 85 | "obj" -> OBJ
  86 + | "head" -> HEAD
85 87 | s -> failwith ("parse_gf: " ^ s)
86 88  
87 89 let parse_control arg = function
... ... @@ -128,6 +130,7 @@ let parse_number = function
128 130  
129 131 let parse_gender = function
130 132 "m1" -> Gender "m1"
  133 + | "m2" -> Gender "m2"
131 134 | "m3" -> Gender "m3"
132 135 | "n" -> Gender "n"(*Genders["n1";"n2"]*)
133 136 | "f" -> Gender "f"
... ... @@ -136,6 +139,21 @@ let parse_gender = function
136 139 | "agr" -> GenderAgr
137 140 | s -> failwith ("parse_gender: " ^ s)
138 141  
  142 +let parse_genders = function
  143 + [Symbol "agr"] -> GenderAgr
  144 + | genders ->
  145 + let genders = Xlist.map genders (function
  146 + Symbol "m1" -> "m1"
  147 + | Symbol "m2" -> "m2"
  148 + | Symbol "m3" -> "m3"
  149 + | Symbol "n" -> "n"
  150 + | Symbol "f" -> "f"
  151 + | s -> failwith ("parse_genders: " ^ tei_to_string s)) in
  152 + (match genders with
  153 + [g] -> Gender g
  154 + | [] -> failwith "parse_genders: empty"
  155 + | _ -> Genders genders)
  156 +
139 157 let parse_grad = function
140 158 "pos" -> Grad "pos"
141 159 | "com" -> Grad "com"
... ... @@ -310,7 +328,7 @@ and load_lex arg xml = match xml with
310 328 | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue}
311 329 | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse}
312 330 | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty}
313   - | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value}
  331 + | Fset("gender",genders) -> {arg with lex_gender = parse_genders genders}
314 332 | xml ->
315 333 Printf.printf "%s\n" (tei_to_string xml);
316 334 failwith "load_lex:\n "
... ... @@ -436,6 +454,8 @@ let load_phrases_set ent = function
436 454 let load_example_info ent arg = function
437 455 | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) ->
438 456 if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
  457 + | F("meaning",SameAs({hash=true; numbers=[id]; suffix="mng"},"lexical_unit")) ->
  458 + {arg with meaning = id}
439 459 | Fset("phrases",phrases_set) ->
440 460 {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))}
441 461 | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}
... ... @@ -456,8 +476,8 @@ let load_example ent = function
456 476 let load_self_prefs_sets name ent frm = function
457 477 | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets"
458 478 | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets"
459   - | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) ->
460   - if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id)
  479 + | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"}, "argument"))]) ->
  480 + if (*ent_id <> ent ||*) frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d" ent (*ent_id*))
461 481 else RelationArgId(value,arg_id)
462 482 | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml)
463 483  
... ... @@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function
472 492 | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->
473 493 {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))}
474 494 (* | Id id -> {arg with arg_id = id} *)
475   - | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} ->
476   - if ent_id = ent && frm_id = frm then {arg with arg_id = id}
477   - else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id)
  495 + | Id{hash=false; numbers=[(*ent_id;*)frm_id;id]; suffix="arg"} ->
  496 + if (*ent_id = ent &&*) frm_id = frm then {arg with arg_id = id}
  497 + else failwith (Printf.sprintf "load_argument_info %d" ent (*ent_id*))
478 498 | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)
479 499  
480 500 let load_arguments_set ent frm = function
... ... @@ -485,21 +505,26 @@ let load_arguments_set ent frm = function
485 505 | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)
486 506  
487 507 let load_meanings_set ent = function
488   - | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") ->
489   - if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)
  508 + | SameAs({hash=true; numbers=[(*ent_id;*)id]; suffix="mng"},"lexical_unit") ->
  509 + (*if ent_id = ent then*) id (*else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)*)
490 510 | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)
491 511  
492 512 let load_frame ent = function
493 513 | Fs("frame",[
494   - Id{hash=false; numbers=[ent_id;id]; suffix="frm"};
  514 + Id{hash=false; numbers=[(*ent_id;*)id]; suffix="frm"};
495 515 F("opinion",Symbol opinion);
496 516 Fset("meanings",meanings_set);
497 517 Fset("arguments",arguments_set)]) ->
498   - if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else
  518 + (*if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else*)
  519 + (* Printf.printf "Frame IN %d\n" id; *)
499 520 {frm_id = id;
500 521 opinion = parse_opinion opinion;
501 522 meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent));
502 523 arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))}
  524 + | SameAs({hash=true; numbers=[id]; suffix="frm"},frame) -> (* FIXME !! *)
  525 + (* (try IntMap.find frames id with Not_found -> failwith ("load_frame: ^ " ^ string_of_int id)) *)
  526 + (* Printf.printf "Frame OUT %d\n" id; *)
  527 + {frm_id=(-id); opinion=Nieokreslony; meanings=[]; arguments=[]}
503 528 | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)
504 529  
505 530 let load_meaning_info ent arg = function
... ... @@ -507,7 +532,8 @@ let load_meaning_info ent arg = function
507 532 | F("variant",TEIstring variant_string) -> {arg with variant = variant_string}
508 533 | F("plwnluid",Numeric value) -> {arg with plwnluid = value}
509 534 | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string}
510   - | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id)
  535 + (* | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) *)
  536 + | Id{hash=false; numbers=[id]; suffix="mng"} -> {arg with mng_id = id}
511 537 | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml)
512 538  
513 539  
... ... @@ -518,9 +544,9 @@ let load_meaning ent = function
518 544  
519 545 let load_alter_connection ent = function
520 546 | Fs("connection", [
521   - F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument"));
  547 + F("argument",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"},"argument"));
522 548 Fset("phrases",phrases)]) ->
523   - if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else
  549 + (* if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else *)
524 550 let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase ->
525 551 let sch_id,psn_id,phr_id = load_phrases_set ent phrase in
526 552 IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l),
... ... @@ -528,7 +554,7 @@ let load_alter_connection ent = function
528 554 if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else
529 555 IntSet.min_elt sch_set, frm_id,
530 556 {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)}
531   - | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml)
  557 + | xml -> failwith ("load_alter_connection: \n " ^ tei_to_string xml)
532 558  
533 559 let load_alternations ent = function
534 560 | Fs("alternation",[Fset("connections",connections_set)]) ->
... ... @@ -561,6 +587,17 @@ let load_entry phrases = function
561 587 | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))
562 588 | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
563 589  
  590 +let add_known_frames known_frames e =
  591 + Xlist.fold e.frames known_frames (fun known_frames f ->
  592 + if f.frm_id < 0 then known_frames else IntMap.add known_frames f.frm_id f)
  593 +
  594 +let expand_frames known_frames e =
  595 + {e with frames =
  596 + List.rev (Xlist.rev_map e.frames (fun f ->
  597 + if f.frm_id < 0 then
  598 + try IntMap.find known_frames (-f.frm_id) with Not_found -> failwith "expand_frames"
  599 + else f))}
  600 +
564 601 let load_walenty filename =
565 602 begin
566 603 match Xml.parse_file filename with
... ... @@ -568,7 +605,9 @@ let load_walenty filename =
568 605 [Xml.Element("teiHeader",_,_) ;
569 606 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
570 607 let phrases = ref IntMap.empty in
571   - let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in
  608 + let walenty = Xlist.rev_map entries (load_entry phrases) in
  609 + let known_frames = Xlist.fold walenty IntMap.empty add_known_frames in
  610 + let walenty = Xlist.rev_map walenty (expand_frames known_frames) in
572 611 walenty, !phrases
573 612 | _ -> failwith "load_walenty"
574 613 end
... ...
walenty/ENIAMwalTypes.ml
... ... @@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef
33 33 type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
34 34 (* type acm = Acm of string | AcmUndef *)
35 35  
36   -type gf = SUBJ | OBJ | ARG
  36 +type gf = SUBJ | OBJ | ARG | HEAD (* FIXME *)
37 37  
38 38 type pos =
39 39 SUBST of number * case
... ... @@ -219,3 +219,14 @@ type entry = {ent_id: int;
219 219  
220 220 let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
221 221 frames=[]; meanings=[]; alternations=[]}
  222 +
  223 +type connected = {sch_id: int;
  224 + frm_id: int;
  225 + sopinion: opinion;
  226 + fopinion: opinion;
  227 + meanings: meaning list;
  228 + negativity: negation;
  229 + predicativity: pred;
  230 + aspect: aspect;
  231 + schema: position list;
  232 + examples: (opinion * string) list}
... ...
walenty/README
... ... @@ -32,8 +32,8 @@ make clean
32 32  
33 33 Credits
34 34 -------
35   -Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
36   -Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences
  35 +Copyright © 2016-2018 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  36 +Copyright © 2016-2018 Institute of Computer Science Polish Academy of Sciences
37 37  
38 38 Licence
39 39 -------
... ...
walenty/resources/README
... ... @@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)
3 3  
4 4 Walenty is licensed under the following license:
5 5  
6   -(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)
  6 +(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)
7 7 This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/
8 8 Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata.
9 9 The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.
... ...