Commit 4afde66aa00c8ecb39f904e7d806e7832f12d68e
1 parent
44e8be87
Dostosowanie do nowego Walentego
Showing
14 changed files
with
223 additions
and
43 deletions
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ |
37 | 37 | "day-month-interval";"month-interval";"roman";"roman-interval";"roman-ordnum"; |
38 | 38 | "match-result";"url";"email";"phone-number";"postal-code";"obj-id";"building-number";"list-item";"adj";"adjc";"adjp";"adja"; |
39 | 39 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; |
40 | - "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | |
40 | + "imps";"pred";"aglt";"inf";"pcon";"pant";"pacta";"qub";"part";"comp";"conj";"interj"; | |
41 | 41 | "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; |
42 | 42 | Pos2, []; |
43 | 43 | Cat, []; |
... | ... | @@ -413,6 +413,7 @@ let clarify_categories proper cat coerced (*snode*) = function |
413 | 413 | | lemma,"inf",[aspects] -> [{empty_cats with lemma=lemma; pos="inf"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
414 | 414 | | lemma,"pcon",[aspects] -> [{empty_cats with lemma=lemma; pos="pcon"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
415 | 415 | | lemma,"pant",[aspects] -> [{empty_cats with lemma=lemma; pos="pant"; pos2="verb"; cat=cat; coerced=coerced; snode=snode; aspects=aspects; negations=["aff"; "neg"]}] |
416 | + | lemma,"pacta",[] -> [{empty_cats with lemma=lemma; pos="pacta"; pos2="verb"; cat=cat; coerced=coerced; snode=snode}] | |
416 | 417 | | lemma,"qub",[] -> |
417 | 418 | if StringSet.mem part_set lemma then [{empty_cats with lemma=lemma; pos="part"; pos2="qub"; snode=snode}] |
418 | 419 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"; cat=cat; snode=snode}] |
... | ... | @@ -662,6 +663,7 @@ let pos_categories = Xlist.fold [ |
662 | 663 | "inf",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
663 | 664 | "pcon",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
664 | 665 | "pant",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;Aspect;Negation;]; |
666 | + "pacta",[Lemma;(*NewLemma;*)Cat;Coerced;Role;SNode;]; | |
665 | 667 | "qub",[Lemma;Cat;Role;SNode;]; |
666 | 668 | "part",[Lemma;SNode]; |
667 | 669 | "comp",[Lemma;SNode;];(* ctype *) |
... | ... |
LCGparser/ENIAM_LCGlatexOf.ml
... | ... | @@ -213,7 +213,7 @@ let chart page text_fragments g = |
213 | 213 | String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes -> |
214 | 214 | IntMap.fold nodes l (fun l node1 contents -> |
215 | 215 | Xlist.fold contents l (fun l (node2,symbol,sem) -> |
216 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
216 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
217 | 217 | (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^ |
218 | 218 | "\\end{longtable}" |
219 | 219 | |
... | ... | @@ -221,7 +221,7 @@ let chart2 page text_fragments g = |
221 | 221 | let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in |
222 | 222 | "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ |
223 | 223 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> |
224 | - let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
224 | + let s = try Xlatex.escape_string (IntMap.find text_fragments.(node1) node2) with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | |
225 | 225 | (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ |
226 | 226 | "\\end{longtable}" |
227 | 227 | |
... | ... |
semantics/ENIAMsemGraph.ml
... | ... | @@ -570,7 +570,8 @@ let rec reduce_tree = function |
570 | 570 | (match reduce_tree c with |
571 | 571 | Context c -> |
572 | 572 | let t,args = extract_aroles {t with arole=""} c.cx_contents in |
573 | - make_relation t (Context {c with cx_contents=args}) | |
573 | + (*make_relation t (Context {c with cx_contents=args})*) (* FIXME: to trzeba poprawić tak by działało w obu wersjach parserów *) | |
574 | + Relation(t.role,"",Context {c with cx_contents=args}) | |
574 | 575 | | Variant(e,l) -> reduce_tree (Variant(e,Xlist.map l (fun (i,c) -> i,ManageCoordination(t,c)))) |
575 | 576 | | c -> ManageCoordination(t,c)) |
576 | 577 | | Tuple l -> Tuple(List.rev (Xlist.rev_map l reduce_tree)) |
... | ... |
subsyntax/ENIAMsubsyntax.ml
... | ... | @@ -325,7 +325,7 @@ let parse query = |
325 | 325 | let paths,_ = ENIAM_MWE.process paths in |
326 | 326 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a12"; *) |
327 | 327 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
328 | - let paths = List.rev (Xlist.rev_map paths find_proper_names) in | |
328 | + let paths = if !recognize_proper_names then List.rev (Xlist.rev_map paths find_proper_names) else paths in | |
329 | 329 | (* print_endline "XXXXXXXXXXXXXXXXXXXXXXXXX a13"; *) |
330 | 330 | (* print_endline (ENIAMsubsyntaxStringOf.token_list paths); *) |
331 | 331 | let paths = modify_weights paths in |
... | ... |
subsyntax/ENIAMsubsyntaxTypes.ml
subsyntax/interface.ml
... | ... | @@ -43,6 +43,8 @@ let spec_list = [ |
43 | 43 | "--no-internet-mode", Arg.Unit (fun () -> ENIAMtokenizerTypes.internet_mode:=false), "Strict attitude towards interpunction (default)"; |
44 | 44 | "--par-names", Arg.Unit (fun () -> par_names:=true), "Identifiers of paragraphs provided"; |
45 | 45 | "--no-par-names", Arg.Unit (fun () -> par_names:=false), "No identifiers of paragraphs provided (default)"; |
46 | + "--proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=true), "Recognize proper names (default)"; | |
47 | + "--no-proper-names", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.recognize_proper_names:=false), "Do not recognize proper names"; | |
46 | 48 | ] |
47 | 49 | |
48 | 50 | let usage_msg = |
... | ... |
walenty/ENIAMwalAnalyze.ml
... | ... | @@ -93,7 +93,7 @@ let walenty_filename,expands_filename = |
93 | 93 | (* "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml", |
94 | 94 | "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" *) |
95 | 95 | |
96 | -let _ = | |
96 | +(*let _ = | |
97 | 97 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in |
98 | 98 | let walenty = Xlist.rev_map walenty correct_walenty in |
99 | 99 | let expands = ENIAMwalTEI.load_expands expands_filename in |
... | ... | @@ -126,7 +126,7 @@ let _ = |
126 | 126 | File.file_out "results/controll.tab" (fun file -> |
127 | 127 | StringMap.iter cmap (fun s l -> |
128 | 128 | Printf.fprintf file "%d\t%s\t%s\n" (Xlist.size l) s (String.concat " " l))); |
129 | - () | |
129 | + ()*) | |
130 | 130 | |
131 | 131 | (* Test unikalności indeksów sensów *) |
132 | 132 | (* let _ = |
... | ... | @@ -339,3 +339,110 @@ let has_realization = function |
339 | 339 | |
340 | 340 | (* let _ = print_entries entries *) |
341 | 341 | *) |
342 | + | |
343 | +let selected_phrases = | |
344 | + File.fold_tab "results/phrases_cp.tab" IntSet.empty (fun set -> function | |
345 | + [id;_] -> IntSet.add set (int_of_string id) | |
346 | + | _ -> failwith "selected_phrases") | |
347 | + | |
348 | +let print_phrases filename phrases = | |
349 | + File.file_out filename (fun file -> | |
350 | + IntMap.iter phrases (fun id morf -> | |
351 | + Printf.fprintf file "%d\t%s\n" id (ENIAMwalStringOf.morf morf))) | |
352 | + | |
353 | +let rec connected_schema schema = | |
354 | + String.concat "+" (Xlist.map schema (fun s -> | |
355 | + String.concat "," ( | |
356 | + (if s.gf = ARG then [] else [ENIAMwalStringOf.gf s.gf])@ | |
357 | + s.mode@(ENIAMwalStringOf.controllers s.cr)@(ENIAMwalStringOf.controllees s.ce)) ^ | |
358 | + "{" ^ String.concat ";" (Xlist.map s.morfs ENIAMwalStringOf.morf) ^ "}:" ^ ENIAMwalStringOf.sem_frame s)) | |
359 | + | |
360 | +let print_connected filename connected = | |
361 | + File.file_out filename (fun file -> | |
362 | + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema,examples*) -> | |
363 | + Printf.fprintf file "\n\t%d\t%d\t%s: %s: %s: %s: %s: %s: %s: %s:\t%s\n" | |
364 | + c.sch_id c.frm_id pos lemma | |
365 | + (ENIAMwalStringOf.opinion c.sopinion) | |
366 | + (ENIAMwalStringOf.opinion c.fopinion) | |
367 | + (String.concat "," (Xlist.map c.meanings (fun m -> | |
368 | + if m.name="" then string_of_int m.mng_id else m.name ^ "-" ^ m.variant))) | |
369 | + (ENIAMwalStringOf.negation c.negativity) | |
370 | + (ENIAMwalStringOf.pred c.predicativity) | |
371 | + (ENIAMwalStringOf.aspect c.aspect) | |
372 | + (connected_schema c.schema); | |
373 | + Xlist.iter c.examples (fun (opinion,exm) -> | |
374 | + Printf.fprintf file "#%s: %s\n" (ENIAMwalStringOf.opinion opinion) exm))) | |
375 | + | |
376 | +let expand_morf phrases = function | |
377 | + | MorfId id -> | |
378 | + (try IntMap.find phrases id | |
379 | + with Not_found -> Printf.printf "expand_morf: %d\n" id; MorfId id) | |
380 | + | _ -> failwith "expand_morf" | |
381 | + | |
382 | +let expand_sel_prefs meanings = function | |
383 | + SynsetId id -> | |
384 | + (try | |
385 | + let m = IntMap.find meanings id in | |
386 | + Predef (m.name ^ "-" ^ m.variant) | |
387 | + with Not_found -> (*Printf.printf "expand_sel_prefs: %d\n" id;*) SynsetId id) | |
388 | + | s -> s | |
389 | + | |
390 | +let expand_schema phrases meanings_map c = | |
391 | + let schema = Xlist.map c.schema (fun (s : position) -> | |
392 | + {s with | |
393 | + morfs = Xlist.map s.morfs (expand_morf phrases); | |
394 | + sel_prefs = Xlist.map s.sel_prefs (expand_sel_prefs meanings_map)}) in | |
395 | + (* let meanings = Xlist.map c.meanings (fun id -> try IntMap.find meanings_map id with Not_found -> {empty_meaning with name=string_of_int id}) in *) | |
396 | + {c with (*meanings2=meanings;*) schema=schema} | |
397 | + | |
398 | +let assign_examples examples c = | |
399 | + let p_set = Xlist.fold c.schema IntSet.empty (fun p_set p -> | |
400 | + Xlist.fold p.morfs p_set (fun p_set -> function | |
401 | + MorfId id -> IntSet.add p_set id | |
402 | + | _ -> p_set)) in | |
403 | + let m_set = Xlist.fold c.meanings IntSet.empty (fun m_set m -> IntSet.add m_set m.mng_id) in | |
404 | + let examples = Xlist.fold examples [] (fun examples (e : example) -> | |
405 | + let b = Xlist.fold e.phrases false (fun b (sch_id,_,morf_id) -> | |
406 | + if c.sch_id = sch_id && IntSet.mem p_set morf_id then true else b) in | |
407 | + if IntSet.mem m_set e.meaning && b then e :: examples else examples) in | |
408 | + let examples = Xlist.rev_map examples (fun e -> e.opinion,e.sentence) in | |
409 | + {c with examples=examples} | |
410 | + | |
411 | +let select_morfs morfs = | |
412 | + List.rev (Xlist.fold morfs [] (fun morfs -> function | |
413 | + MorfId id -> if IntSet.mem selected_phrases id then (MorfId id) :: morfs else morfs | |
414 | + | _ -> failwith "select_morfs")) | |
415 | + | |
416 | +let select_positions schema = | |
417 | + List.rev (Xlist.fold schema [] (fun schema p -> | |
418 | + let morfs = select_morfs p.morfs in | |
419 | + if morfs = [] then schema else | |
420 | + {p with morfs = morfs} :: schema)) | |
421 | + | |
422 | +let select_entries entries = | |
423 | + Xlist.fold entries [] (fun entries c -> | |
424 | + let schema = select_positions c.schema in | |
425 | + if schema = [] then entries else c :: entries) | |
426 | + | |
427 | +(* Wypisanie podrzędników zdaniowych *) | |
428 | +let _ = | |
429 | + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in | |
430 | + print_phrases "results/phrases.tab" phrases; | |
431 | + let meanings = | |
432 | + Xlist.fold walenty IntMap.empty (fun meanings entry -> | |
433 | + Xlist.fold entry.meanings meanings (fun meanings meaning -> | |
434 | + IntMap.add meanings meaning.mng_id meaning)) in | |
435 | + let connected_walenty = | |
436 | + Xlist.fold walenty Entries.empty (fun connected_walenty e -> | |
437 | + (* print_endline "1"; *) | |
438 | + let entries = ENIAMwalConnect.connect e in | |
439 | + (* print_endline "2"; *) | |
440 | + let entries = select_entries entries in | |
441 | + (* print_endline "3"; *) | |
442 | + let entries = Xlist.rev_map entries (assign_examples e.examples) in | |
443 | + (* print_endline "4"; *) | |
444 | + let entries = Xlist.rev_map entries (expand_schema phrases meanings) in | |
445 | + (* print_endline "5"; *) | |
446 | + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in | |
447 | + print_connected "results/connected.tab" connected_walenty; | |
448 | + () | |
... | ... |
walenty/ENIAMwalConnect.ml
... | ... | @@ -30,7 +30,7 @@ let process_positions positions = |
30 | 30 | IntMap.add positions position.psn_id position) |
31 | 31 | |
32 | 32 | let process_schemata schemata = |
33 | - Xlist.fold schemata IntMap.empty (fun schemata schema -> | |
33 | + Xlist.fold schemata IntMap.empty (fun schemata (schema : schema) -> | |
34 | 34 | let atrs = schema.negativity, schema.predicativity, schema.aspect in |
35 | 35 | let positions = process_positions schema.positions in |
36 | 36 | IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions)) |
... | ... | @@ -44,9 +44,9 @@ let process_frames frames = |
44 | 44 | let arguments = process_arguments frame.arguments in |
45 | 45 | IntMap.add frames frame.frm_id (frame,arguments)) |
46 | 46 | |
47 | -(* let process_meanings meanings = | |
47 | +let process_meanings meanings = | |
48 | 48 | Xlist.fold meanings IntMap.empty (fun meanings meaning -> |
49 | - IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) *) | |
49 | + IntMap.add meanings meaning.mng_id meaning(*meaning.name ^ " " ^ meaning.variant*)) | |
50 | 50 | |
51 | 51 | let process_sel_pref arguments = function |
52 | 52 | SynsetId s -> SynsetId s(*try ENIAMplWordnet.synset_name s with Not_found -> "unknown"*) |
... | ... | @@ -59,9 +59,9 @@ let process_sel_pref arguments = function |
59 | 59 | let connect entry = |
60 | 60 | let schemata = process_schemata entry.schemata in |
61 | 61 | let frames = process_frames entry.frames in |
62 | - (* let meanings = process_meanings entry.meanings in *) | |
62 | + let meanings = process_meanings entry.meanings in | |
63 | 63 | Xlist.fold entry.alternations [] (fun found alt -> |
64 | - let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in | |
64 | + let refl,opinion,(n,p,a),positions = IntMap.find schemata alt.schema in | |
65 | 65 | let frame,arguments = IntMap.find frames alt.frame in |
66 | 66 | let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in |
67 | 67 | let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn -> |
... | ... | @@ -75,9 +75,10 @@ let connect entry = |
75 | 75 | with Not_found -> if entry.form_orth <> "podobać" then Printf.printf "connect: %s\n%!" entry.form_orth;morfs) in |
76 | 76 | {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; |
77 | 77 | morfs=List.rev morfs} :: conn_positions)) in |
78 | - (* let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> | |
79 | - IntMap.find meanings id)) in *) | |
80 | - (opinion,frame.opinion,frame.meanings,schema_atrs,conn_positions) :: found) | |
78 | + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> | |
79 | + try IntMap.find meanings id with Not_found -> {empty_meaning with mng_id=id})) in | |
80 | + {sch_id=alt.schema; frm_id=alt.frame; sopinion=opinion; fopinion=frame.opinion; meanings=meanings; | |
81 | + negativity=n; predicativity=p;aspect=a; schema=conn_positions; examples=[]} :: found) | |
81 | 82 | |
82 | 83 | let schemata entry = |
83 | 84 | let schemata = process_schemata entry.schemata in |
... | ... |
walenty/ENIAMwalGenerate.ml
... | ... | @@ -31,40 +31,54 @@ let correct_walenty entry = |
31 | 31 | else entry |
32 | 32 | |
33 | 33 | let load_walenty walenty_filename expands_filename = |
34 | + print_endline "load_walenty 1"; | |
34 | 35 | let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in |
36 | + print_endline "load_walenty 2"; | |
35 | 37 | let walenty = Xlist.rev_map walenty correct_walenty in |
38 | + print_endline "load_walenty 3"; | |
36 | 39 | let expands = ENIAMwalTEI.load_expands expands_filename in |
40 | + print_endline "load_walenty 4"; | |
37 | 41 | let meanings = |
38 | 42 | Xlist.fold walenty IntMap.empty (fun meanings entry -> |
39 | 43 | Xlist.fold entry.meanings meanings (fun meanings meaning -> |
40 | 44 | IntMap.add meanings meaning.mng_id meaning)) in |
45 | + print_endline "load_walenty 5"; | |
41 | 46 | let connected_walenty = |
42 | 47 | Xlist.fold walenty Entries.empty (fun connected_walenty e -> |
43 | 48 | let entries = ENIAMwalConnect.connect e in |
44 | 49 | Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in |
50 | + print_endline "load_walenty 6"; | |
45 | 51 | let schemata_walenty = |
46 | 52 | Xlist.fold walenty Entries.empty (fun schemata_walenty e -> |
47 | 53 | let entries = ENIAMwalConnect.schemata e in |
48 | 54 | Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in |
55 | + print_endline "load_walenty 7"; | |
49 | 56 | let expands,compreps,subtypes,equivs,adv_types = |
50 | 57 | ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in |
58 | + print_endline "load_walenty 8"; | |
51 | 59 | let phrases = |
52 | 60 | IntMap.map phrases (fun morf -> |
53 | 61 | let morf = ENIAMwalRealizations.expand_schema_morf expands morf in |
54 | 62 | let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in |
55 | 63 | let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in |
56 | 64 | morf) in |
65 | + print_endline "load_walenty 9"; | |
57 | 66 | let compreps = Xlist.map compreps (fun (lemma,morfs) -> |
58 | 67 | lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in |
68 | + print_endline "load_walenty 10"; | |
59 | 69 | let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in |
70 | + print_endline "load_walenty 11"; | |
60 | 71 | let phrases,entries = |
61 | 72 | IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> |
62 | 73 | let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in |
63 | 74 | let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in |
64 | 75 | IntMap.add phrases id morfs, entries) in |
76 | + print_endline "load_walenty 12"; | |
65 | 77 | let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) -> |
66 | 78 | Entries.add_inc entries pos lemma entry) in |
79 | + print_endline "load_walenty 13"; | |
67 | 80 | let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in |
81 | + print_endline "load_walenty 14"; | |
68 | 82 | let entries = Entries.flatten_map entries (fun pos lemma entry -> |
69 | 83 | ENIAMwalLex.expand_restr [] lemma pos entry) in |
70 | 84 | (* let entries = |
... | ... | @@ -72,6 +86,7 @@ let load_walenty walenty_filename expands_filename = |
72 | 86 | StringMap.mapi entries2 (fun lemma entries3 -> |
73 | 87 | EntrySet.fold entries3 [] (fun entries3 entry -> |
74 | 88 | (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *) |
89 | + print_endline "load_walenty 15"; | |
75 | 90 | connected_walenty, schemata_walenty, phrases, entries, meanings, adv_types |
76 | 91 | |
77 | 92 | let print_entries filename entries = |
... | ... | @@ -97,15 +112,15 @@ let print_schemata filename schemata = |
97 | 112 | |
98 | 113 | let print_connected filename connected = |
99 | 114 | File.file_out filename (fun file -> |
100 | - Entries.iter connected (fun pos lemma (sopinion,fopinion,meanings,(n,p,a),schema) -> | |
115 | + Entries.iter connected (fun pos lemma c(*sopinion,fopinion,meanings,(n,p,a),schema*) -> | |
101 | 116 | Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma |
102 | - (ENIAMwalStringOf.opinion sopinion) | |
103 | - (ENIAMwalStringOf.opinion fopinion) | |
104 | - (String.concat "," (Xlist.map meanings string_of_int)) | |
105 | - (ENIAMwalStringOf.negation n) | |
106 | - (ENIAMwalStringOf.pred p) | |
107 | - (ENIAMwalStringOf.aspect a) | |
108 | - (ENIAMwalStringOf.connected_schema schema))) | |
117 | + (ENIAMwalStringOf.opinion c.sopinion) | |
118 | + (ENIAMwalStringOf.opinion c.fopinion) | |
119 | + (String.concat "," (Xlist.map c.meanings (fun m -> string_of_int m.mng_id))) | |
120 | + (ENIAMwalStringOf.negation c.negativity) | |
121 | + (ENIAMwalStringOf.pred c.predicativity) | |
122 | + (ENIAMwalStringOf.aspect c.aspect) | |
123 | + (ENIAMwalStringOf.connected_schema c.schema))) | |
109 | 124 | |
110 | 125 | let split_tokens s = |
111 | 126 | let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function |
... | ... |
walenty/ENIAMwalStringOf.ml
walenty/ENIAMwalTEI.ml
... | ... | @@ -60,7 +60,8 @@ let rec tei_to_string = function |
60 | 60 | | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) |
61 | 61 | | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string)) |
62 | 62 | | Id id -> Printf.sprintf "Id(%s)" (string_of_id id) |
63 | - | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s | |
63 | + (* | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s *) | |
64 | + | SameAs(id,s) -> Printf.sprintf "SameAs(%s,%s)" (string_of_id id) s | |
64 | 65 | |
65 | 66 | let rec parse_tei = function |
66 | 67 | Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) -> |
... | ... | @@ -82,6 +83,7 @@ let rec parse_tei = function |
82 | 83 | let parse_gf = function |
83 | 84 | "subj" -> SUBJ |
84 | 85 | | "obj" -> OBJ |
86 | + | "head" -> HEAD | |
85 | 87 | | s -> failwith ("parse_gf: " ^ s) |
86 | 88 | |
87 | 89 | let parse_control arg = function |
... | ... | @@ -128,6 +130,7 @@ let parse_number = function |
128 | 130 | |
129 | 131 | let parse_gender = function |
130 | 132 | "m1" -> Gender "m1" |
133 | + | "m2" -> Gender "m2" | |
131 | 134 | | "m3" -> Gender "m3" |
132 | 135 | | "n" -> Gender "n"(*Genders["n1";"n2"]*) |
133 | 136 | | "f" -> Gender "f" |
... | ... | @@ -136,6 +139,21 @@ let parse_gender = function |
136 | 139 | | "agr" -> GenderAgr |
137 | 140 | | s -> failwith ("parse_gender: " ^ s) |
138 | 141 | |
142 | +let parse_genders = function | |
143 | + [Symbol "agr"] -> GenderAgr | |
144 | + | genders -> | |
145 | + let genders = Xlist.map genders (function | |
146 | + Symbol "m1" -> "m1" | |
147 | + | Symbol "m2" -> "m2" | |
148 | + | Symbol "m3" -> "m3" | |
149 | + | Symbol "n" -> "n" | |
150 | + | Symbol "f" -> "f" | |
151 | + | s -> failwith ("parse_genders: " ^ tei_to_string s)) in | |
152 | + (match genders with | |
153 | + [g] -> Gender g | |
154 | + | [] -> failwith "parse_genders: empty" | |
155 | + | _ -> Genders genders) | |
156 | + | |
139 | 157 | let parse_grad = function |
140 | 158 | "pos" -> Grad "pos" |
141 | 159 | | "com" -> Grad "com" |
... | ... | @@ -310,7 +328,7 @@ and load_lex arg xml = match xml with |
310 | 328 | | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue} |
311 | 329 | | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse} |
312 | 330 | | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty} |
313 | - | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value} | |
331 | + | Fset("gender",genders) -> {arg with lex_gender = parse_genders genders} | |
314 | 332 | | xml -> |
315 | 333 | Printf.printf "%s\n" (tei_to_string xml); |
316 | 334 | failwith "load_lex:\n " |
... | ... | @@ -436,6 +454,8 @@ let load_phrases_set ent = function |
436 | 454 | let load_example_info ent arg = function |
437 | 455 | | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) -> |
438 | 456 | if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id) |
457 | + | F("meaning",SameAs({hash=true; numbers=[id]; suffix="mng"},"lexical_unit")) -> | |
458 | + {arg with meaning = id} | |
439 | 459 | | Fset("phrases",phrases_set) -> |
440 | 460 | {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))} |
441 | 461 | | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string} |
... | ... | @@ -456,8 +476,8 @@ let load_example ent = function |
456 | 476 | let load_self_prefs_sets name ent frm = function |
457 | 477 | | Numeric value -> if name = "synsets" then SynsetId value else failwith "load_self_prefs_sets" |
458 | 478 | | Symbol value -> if name = "predefs" then Predef value else failwith "load_self_prefs_sets" |
459 | - | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) -> | |
460 | - if ent_id <> ent || frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id) | |
479 | + | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"}, "argument"))]) -> | |
480 | + if (*ent_id <> ent ||*) frm_id <> frm || name <> "relations" then failwith (Printf.sprintf "load_self_prefs_sets %d" ent (*ent_id*)) | |
461 | 481 | else RelationArgId(value,arg_id) |
462 | 482 | | xml -> failwith ("load_self_prefs_sets: \n " ^ tei_to_string xml) |
463 | 483 | |
... | ... | @@ -472,9 +492,9 @@ let load_argument_info ent frm arg = function |
472 | 492 | | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) -> |
473 | 493 | {arg with sel_prefs = List.flatten (List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent frm)))} |
474 | 494 | (* | Id id -> {arg with arg_id = id} *) |
475 | - | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} -> | |
476 | - if ent_id = ent && frm_id = frm then {arg with arg_id = id} | |
477 | - else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id) | |
495 | + | Id{hash=false; numbers=[(*ent_id;*)frm_id;id]; suffix="arg"} -> | |
496 | + if (*ent_id = ent &&*) frm_id = frm then {arg with arg_id = id} | |
497 | + else failwith (Printf.sprintf "load_argument_info %d" ent (*ent_id*)) | |
478 | 498 | | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml) |
479 | 499 | |
480 | 500 | let load_arguments_set ent frm = function |
... | ... | @@ -485,21 +505,26 @@ let load_arguments_set ent frm = function |
485 | 505 | | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml) |
486 | 506 | |
487 | 507 | let load_meanings_set ent = function |
488 | - | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") -> | |
489 | - if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id) | |
508 | + | SameAs({hash=true; numbers=[(*ent_id;*)id]; suffix="mng"},"lexical_unit") -> | |
509 | + (*if ent_id = ent then*) id (*else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)*) | |
490 | 510 | | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml) |
491 | 511 | |
492 | 512 | let load_frame ent = function |
493 | 513 | | Fs("frame",[ |
494 | - Id{hash=false; numbers=[ent_id;id]; suffix="frm"}; | |
514 | + Id{hash=false; numbers=[(*ent_id;*)id]; suffix="frm"}; | |
495 | 515 | F("opinion",Symbol opinion); |
496 | 516 | Fset("meanings",meanings_set); |
497 | 517 | Fset("arguments",arguments_set)]) -> |
498 | - if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else | |
518 | + (*if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else*) | |
519 | + (* Printf.printf "Frame IN %d\n" id; *) | |
499 | 520 | {frm_id = id; |
500 | 521 | opinion = parse_opinion opinion; |
501 | 522 | meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent)); |
502 | 523 | arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))} |
524 | + | SameAs({hash=true; numbers=[id]; suffix="frm"},frame) -> (* FIXME !! *) | |
525 | + (* (try IntMap.find frames id with Not_found -> failwith ("load_frame: ^ " ^ string_of_int id)) *) | |
526 | + (* Printf.printf "Frame OUT %d\n" id; *) | |
527 | + {frm_id=(-id); opinion=Nieokreslony; meanings=[]; arguments=[]} | |
503 | 528 | | xml -> failwith ("load_frame :\n " ^ tei_to_string xml) |
504 | 529 | |
505 | 530 | let load_meaning_info ent arg = function |
... | ... | @@ -507,7 +532,8 @@ let load_meaning_info ent arg = function |
507 | 532 | | F("variant",TEIstring variant_string) -> {arg with variant = variant_string} |
508 | 533 | | F("plwnluid",Numeric value) -> {arg with plwnluid = value} |
509 | 534 | | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string} |
510 | - | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) | |
535 | + (* | Id{hash=false; numbers=[ent_id;id]; suffix="mng"} -> if ent_id = ent then {arg with mng_id = id} else failwith (Printf.sprintf "load_meaning_info %d %d" ent ent_id) *) | |
536 | + | Id{hash=false; numbers=[id]; suffix="mng"} -> {arg with mng_id = id} | |
511 | 537 | | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml) |
512 | 538 | |
513 | 539 | |
... | ... | @@ -518,9 +544,9 @@ let load_meaning ent = function |
518 | 544 | |
519 | 545 | let load_alter_connection ent = function |
520 | 546 | | Fs("connection", [ |
521 | - F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument")); | |
547 | + F("argument",SameAs({hash=true; numbers=[(*ent_id;*)frm_id;arg_id]; suffix="arg"},"argument")); | |
522 | 548 | Fset("phrases",phrases)]) -> |
523 | - if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else | |
549 | + (* if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else *) | |
524 | 550 | let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase -> |
525 | 551 | let sch_id,psn_id,phr_id = load_phrases_set ent phrase in |
526 | 552 | IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l), |
... | ... | @@ -528,7 +554,7 @@ let load_alter_connection ent = function |
528 | 554 | if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else |
529 | 555 | IntSet.min_elt sch_set, frm_id, |
530 | 556 | {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)} |
531 | - | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml) | |
557 | + | xml -> failwith ("load_alter_connection: \n " ^ tei_to_string xml) | |
532 | 558 | |
533 | 559 | let load_alternations ent = function |
534 | 560 | | Fs("alternation",[Fset("connections",connections_set)]) -> |
... | ... | @@ -561,6 +587,17 @@ let load_entry phrases = function |
561 | 587 | | xml -> failwith ("load_entry: \n" ^ tei_to_string xml))) |
562 | 588 | | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) |
563 | 589 | |
590 | +let add_known_frames known_frames e = | |
591 | + Xlist.fold e.frames known_frames (fun known_frames f -> | |
592 | + if f.frm_id < 0 then known_frames else IntMap.add known_frames f.frm_id f) | |
593 | + | |
594 | +let expand_frames known_frames e = | |
595 | + {e with frames = | |
596 | + List.rev (Xlist.rev_map e.frames (fun f -> | |
597 | + if f.frm_id < 0 then | |
598 | + try IntMap.find known_frames (-f.frm_id) with Not_found -> failwith "expand_frames" | |
599 | + else f))} | |
600 | + | |
564 | 601 | let load_walenty filename = |
565 | 602 | begin |
566 | 603 | match Xml.parse_file filename with |
... | ... | @@ -568,7 +605,9 @@ let load_walenty filename = |
568 | 605 | [Xml.Element("teiHeader",_,_) ; |
569 | 606 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> |
570 | 607 | let phrases = ref IntMap.empty in |
571 | - let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in | |
608 | + let walenty = Xlist.rev_map entries (load_entry phrases) in | |
609 | + let known_frames = Xlist.fold walenty IntMap.empty add_known_frames in | |
610 | + let walenty = Xlist.rev_map walenty (expand_frames known_frames) in | |
572 | 611 | walenty, !phrases |
573 | 612 | | _ -> failwith "load_walenty" |
574 | 613 | end |
... | ... |
walenty/ENIAMwalTypes.ml
... | ... | @@ -33,7 +33,7 @@ type grad = Grad of string | GradUndef |
33 | 33 | type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef |
34 | 34 | (* type acm = Acm of string | AcmUndef *) |
35 | 35 | |
36 | -type gf = SUBJ | OBJ | ARG | |
36 | +type gf = SUBJ | OBJ | ARG | HEAD (* FIXME *) | |
37 | 37 | |
38 | 38 | type pos = |
39 | 39 | SUBST of number * case |
... | ... | @@ -219,3 +219,14 @@ type entry = {ent_id: int; |
219 | 219 | |
220 | 220 | let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[]; |
221 | 221 | frames=[]; meanings=[]; alternations=[]} |
222 | + | |
223 | +type connected = {sch_id: int; | |
224 | + frm_id: int; | |
225 | + sopinion: opinion; | |
226 | + fopinion: opinion; | |
227 | + meanings: meaning list; | |
228 | + negativity: negation; | |
229 | + predicativity: pred; | |
230 | + aspect: aspect; | |
231 | + schema: position list; | |
232 | + examples: (opinion * string) list} | |
... | ... |
walenty/README
... | ... | @@ -32,8 +32,8 @@ make clean |
32 | 32 | |
33 | 33 | Credits |
34 | 34 | ------- |
35 | -Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
36 | -Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
35 | +Copyright © 2016-2018 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
36 | +Copyright © 2016-2018 Institute of Computer Science Polish Academy of Sciences | |
37 | 37 | |
38 | 38 | Licence |
39 | 39 | ------- |
... | ... |
walenty/resources/README
... | ... | @@ -3,7 +3,7 @@ Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty) |
3 | 3 | |
4 | 4 | Walenty is licensed under the following license: |
5 | 5 | |
6 | -(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) | |
6 | +(C) Copyright 2012–2018 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN) | |
7 | 7 | This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/ |
8 | 8 | Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains 90326 schemata and 17920 frames for 16044 lemmata. |
9 | 9 | The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk. |
... | ... |