Commit accee0d9e03c9c3afe9dd18736116239850007d9
1 parent
282d8aec
sensy i string_of w lexSemantics
Showing
4 changed files
with
27 additions
and
10 deletions
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -21,6 +21,21 @@ open ENIAMtokenizerTypes |
21 | 21 | open ENIAMlexSemanticsTypes |
22 | 22 | open Xstd |
23 | 23 | |
24 | +let string_of_lex_sems tokens lex_sems = | |
25 | + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id -> | |
26 | + let t = ExtArray.get lex_sems id in | |
27 | + let t2 = ExtArray.get tokens id in | |
28 | + let orth = t2.ENIAMtokenizerTypes.orth in | |
29 | + let lemma = ENIAMtokens.string_of_token t2.ENIAMtokenizerTypes.token in | |
30 | + let lroles = if snd t.lroles = "" then fst t.lroles else fst t.lroles ^ " " ^ snd t.lroles in | |
31 | + let core = Printf.sprintf "%3d %s %s %s" id orth lemma lroles in | |
32 | + let senses = Xlist.map t.senses (fun (sense,hipero,weight) -> | |
33 | + Printf.sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight) in | |
34 | + let valence = Xlist.map t.valence (ENIAMwalStringOf.fnum_frame "") in | |
35 | + let simple_valence = Xlist.map t.simple_valence (ENIAMwalStringOf.fnum_frame "") in | |
36 | + (* let semantics = *) | |
37 | + (String.concat "\n " ([core] @ senses @ valence @ simple_valence)) :: l))) | |
38 | + | |
24 | 39 | let load_proper_name proper = function |
25 | 40 | [lemma; types] -> |
26 | 41 | let types = Str.split (Str.regexp "|") types in |
... | ... | @@ -29,7 +44,7 @@ let load_proper_name proper = function |
29 | 44 | |
30 | 45 | let proper_names = |
31 | 46 | let proper = File.fold_tab proper_names_filename StringMap.empty load_proper_name in |
32 | - let proper = File.fold_tab proper_names_filename2 StringMap.empty load_proper_name in | |
47 | + let proper = File.fold_tab proper_names_filename2 proper load_proper_name in | |
33 | 48 | proper |
34 | 49 | |
35 | 50 | let remove l s = |
... | ... | @@ -64,7 +79,8 @@ let assign tokens text = |
64 | 79 | let senses = find_senses token in |
65 | 80 | let lex_sem = {empty_lex_sem with senses=senses} in |
66 | 81 | let j = ExtArray.add lex_sems lex_sem in |
67 | - if j <> i then failwith "assign_semantic_valence") | |
82 | + if j <> i then failwith "assign_semantic_valence"); | |
83 | + lex_sems | |
68 | 84 | |
69 | 85 | (* |
70 | 86 | (* print_endline "a14"; *) |
... | ... |
lexSemantics/ENIAMlexSemanticsData.ml
... | ... | @@ -17,9 +17,10 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open PreTypes | |
20 | +open ENIAMtokenizerTypes | |
21 | +open ENIAMlexSemanticsTypes | |
21 | 22 | open Xstd |
22 | - | |
23 | +(* | |
23 | 24 | let subst_inst_roles = Xlist.fold [ |
24 | 25 | "wiosna", "Time",""; |
25 | 26 | "lato", "Time",""; |
... | ... | @@ -362,7 +363,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
362 | 363 | "temu","acc", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
363 | 364 | "za","gen", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
364 | 365 | ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) -> |
365 | - let hipero = Xlist.fold hipero StringSet.empty PreWordnet.get_hipero_rec in | |
366 | + let hipero = Xlist.fold hipero StringSet.empty ENIAMplWordnet.get_hipero_rec in | |
366 | 367 | let map2 = try StringMap.find map lemma with Not_found -> StringMap.empty in |
367 | 368 | let map2 = StringMap.add_inc map2 case [role,role_attr,hipero,sel_prefs] (fun l -> (role,role_attr,hipero,sel_prefs) :: l) in |
368 | 369 | StringMap.add map lemma map2) |
... | ... | @@ -548,3 +549,4 @@ let assign_semantics paths = |
548 | 549 | | Lemma(lemma,"prep",l) -> Xlist.fold l [] (fun l -> function cases :: _ -> assign_prep_semantics lemma cases t @ l | [] -> l) |
549 | 550 | | _ -> [t] |
550 | 551 | ))) |
552 | +*) | |
... | ... |
lexSemantics/makefile
... | ... | @@ -6,15 +6,15 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa #eniam-lexSemantics.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= ENIAMlexSemanticsTypes.ml ENIAMlexSemantics.ml | |
9 | +SOURCES= ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml | |
10 | 10 | |
11 | 11 | all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa |
12 | 12 | |
13 | 13 | install: all |
14 | 14 | mkdir -p $(INSTALLDIR) |
15 | 15 | cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR) |
16 | - cp ENIAMlexSemanticsTypes.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
17 | - cp ENIAMlexSemanticsTypes.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
16 | + cp ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsData.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
17 | + cp ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsData.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
18 | 18 | mkdir -p /usr/share/eniam/lexSemantics |
19 | 19 | cp resources/* /usr/share/eniam/lexSemantics |
20 | 20 | ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab |
... | ... |
lexSemantics/test.ml
... | ... | @@ -59,8 +59,7 @@ let _ = |
59 | 59 | print_endline ("\nTEST: " ^ s); |
60 | 60 | let text,tokens = ENIAMsubsyntax.parse_text s in |
61 | 61 | let lex_sems = ENIAMlexSemantics.assign tokens text in |
62 | - (* print_endline (ENIAMtokenizer.xml_of tokens); *) | |
63 | - Xlist.iter tokens (fun token -> print_endline (ENIAMtokenizer.string_of 0 token))); | |
62 | + print_endline (ENIAMlexSemantics.string_of_lex_sems tokens lex_sems)); | |
64 | 63 | (* print_endline "Testy użytkownika."; |
65 | 64 | print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; |
66 | 65 | let s = ref (read_line ()) in |
... | ... |