Commit 3d8c3471fe1997aeaa1ce7b19d80d05be880a988
1 parent
2cf638de
Dodanie arole
Showing
5 changed files
with
49 additions
and
33 deletions
lexSemantics/ENIAMadjuncts.ml
@@ -268,19 +268,21 @@ let add_adjuncts preps compreps compars pos2 (selectors,schema) = | @@ -268,19 +268,21 @@ let add_adjuncts preps compreps compars pos2 (selectors,schema) = | ||
268 | | "adv" -> [selectors,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | 268 | | "adv" -> [selectors,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] |
269 | | _ -> [] | 269 | | _ -> [] |
270 | 270 | ||
271 | -let add_connected_adjuncts preps compreps compars pos2 (selectors,meanings,schema) = | 271 | +open ENIAMlexSemanticsTypes |
272 | + | ||
273 | +let add_connected_adjuncts preps compreps compars pos2 frame = | ||
272 | let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_connected_comprep in | 274 | let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_connected_comprep in |
273 | let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepnp prep cases) in | 275 | let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepnp prep cases) in |
274 | let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepadjp prep cases) in | 276 | let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepadjp prep cases) in |
275 | let compars = Xlist.rev_map compars ENIAMwalRenderer.render_connected_compar in | 277 | let compars = Xlist.rev_map compars ENIAMwalRenderer.render_connected_compar in |
276 | match pos2 with | 278 | match pos2 with |
277 | - "verb" -> [selectors,meanings,schema @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | 279 | + "verb" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}] |
278 | | "noun" -> [ | 280 | | "noun" -> [ |
279 | - [Nsyn,Eq,["proper"]] @ selectors,meanings,ENIAMwalRenderer.proper_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars; | ||
280 | - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,meanings,ENIAMwalRenderer.measure_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars; | ||
281 | - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,meanings,ENIAMwalRenderer.common_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars] | ||
282 | - | "adj" -> [selectors,meanings,schema @ ENIAMwalRenderer.adj_connected_adjuncts_simp @ compars] | ||
283 | - | "adv" -> [selectors,meanings,schema @ ENIAMwalRenderer.adv_connected_adjuncts_simp @ compars] | 281 | + {frame with selectors=[Nsyn,Eq,["proper"]] @ frame.selectors; positions=ENIAMwalRenderer.proper_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}; |
282 | + {frame with selectors=[Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ frame.selectors; positions=ENIAMwalRenderer.measure_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}; | ||
283 | + {frame with selectors=[Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ frame.selectors; positions=frame.positions @ ENIAMwalRenderer.common_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}] | ||
284 | + | "adj" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.adj_connected_adjuncts_simp @ compars}] | ||
285 | + | "adv" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.adv_connected_adjuncts_simp @ compars}] | ||
284 | | _ -> [] | 286 | | _ -> [] |
285 | 287 | ||
286 | (* let _ = | 288 | (* let _ = |
lexSemantics/ENIAMlexSemantics.ml
@@ -32,8 +32,8 @@ let find_meaning m = | @@ -32,8 +32,8 @@ let find_meaning m = | ||
32 | let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB))) | 32 | let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB))) |
33 | 33 | ||
34 | let find_senses t s = (* FIXME: sensy zawierające 'się' *) | 34 | let find_senses t s = (* FIXME: sensy zawierające 'się' *) |
35 | - let set = Xlist.fold s.frames StringSet.empty (fun set (selectors,meanings,positions) -> | ||
36 | - Xlist.fold meanings set (fun set (name,hipero,weight) -> | 35 | + let set = Xlist.fold s.frames StringSet.empty (fun set frame -> |
36 | + Xlist.fold frame.meanings set (fun set (name,hipero,weight) -> | ||
37 | StringSet.add set name)) in | 37 | StringSet.add set name)) in |
38 | let senses = match t.token with | 38 | let senses = match t.token with |
39 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos | 39 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos |
@@ -47,9 +47,9 @@ let find_senses t s = (* FIXME: sensy zawierające 'się' *) | @@ -47,9 +47,9 @@ let find_senses t s = (* FIXME: sensy zawierające 'się' *) | ||
47 | | _ -> [] in | 47 | | _ -> [] in |
48 | let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) -> | 48 | let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) -> |
49 | if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in | 49 | if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in |
50 | - let frames = if senses = [] then s.frames else ([],senses,[]) :: s.frames in | ||
51 | - let frames = if senses_sie = [] then frames else ([],senses_sie, | ||
52 | - [{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]}]) :: frames in | 50 | + let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in |
51 | + let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie; | ||
52 | + positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]}]} :: frames in (* FIXME: czy to nie usuwa elementów z ramy? *) | ||
53 | {s with frames=frames} | 53 | {s with frames=frames} |
54 | 54 | ||
55 | let find_selprefs schema = (* FIXME: RelationRole *) | 55 | let find_selprefs schema = (* FIXME: RelationRole *) |
@@ -165,13 +165,12 @@ let assign_valence tokens lex_sems group = | @@ -165,13 +165,12 @@ let assign_valence tokens lex_sems group = | ||
165 | selectors,ENIAMwalRenderer.render_lex_entry entry) in | 165 | selectors,ENIAMwalRenderer.render_lex_entry entry) in |
166 | let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema) -> | 166 | let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema) -> |
167 | Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema) (fun (selectors,schema) -> | 167 | Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema) (fun (selectors,schema) -> |
168 | - selectors,meanings,schema))) in (* FIXME: gubię opinię *) | ||
169 | - let connected = Xlist.fold connected [] (fun connected (selectors,meanings,schema) -> | ||
170 | - if ENIAMadjuncts.check_selector_lex_constraints lexemes pos selectors then (selectors,meanings,schema) :: connected else connected) in | ||
171 | - let connected = Xlist.rev_map connected (fun (selectors,meanings,schema) -> | ||
172 | - selectors, | ||
173 | - Xlist.map meanings find_meaning, | ||
174 | - find_selprefs (ENIAMwalRenderer.render_connected_schema (ENIAMwalReduce.set_necessary schema))) in | 168 | + {empty_frame with selectors=selectors; meanings= Xlist.map meanings find_meaning; positions=schema}))) in (* FIXME: gubię opinię *) |
169 | + let connected = Xlist.fold connected [] (fun connected frame -> | ||
170 | + if ENIAMadjuncts.check_selector_lex_constraints lexemes pos frame.selectors then frame :: connected else connected) in | ||
171 | + let connected = Xlist.rev_map connected (fun frame -> | ||
172 | + {frame with | ||
173 | + positions = find_selprefs (ENIAMwalRenderer.render_connected_schema (ENIAMwalReduce.set_necessary frame.positions))}) in | ||
175 | let connected = List.flatten (Xlist.rev_map connected (ENIAMadjuncts.add_connected_adjuncts preps compreps compars pos2)) in | 174 | let connected = List.flatten (Xlist.rev_map connected (ENIAMadjuncts.add_connected_adjuncts preps compreps compars pos2)) in |
176 | ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with | 175 | ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with |
177 | schemata=schemata; lex_entries=entries; frames=connected}) | 176 | schemata=schemata; lex_entries=entries; frames=connected}) |
@@ -218,8 +217,8 @@ let assign_valence tokens lex_sems group = | @@ -218,8 +217,8 @@ let assign_valence tokens lex_sems group = | ||
218 | 217 | ||
219 | let disambiguate_senses lex_sems group = | 218 | let disambiguate_senses lex_sems group = |
220 | let prefs = Xlist.fold group (StringSet.singleton "ALL") (fun prefs id -> | 219 | let prefs = Xlist.fold group (StringSet.singleton "ALL") (fun prefs id -> |
221 | - Xlist.fold (ExtArray.get lex_sems id).frames prefs (fun prefs (_,_,schema) -> | ||
222 | - Xlist.fold schema prefs (fun prefs t -> | 220 | + Xlist.fold (ExtArray.get lex_sems id).frames prefs (fun prefs frame -> |
221 | + Xlist.fold frame.positions prefs (fun prefs t -> | ||
223 | Xlist.fold t.sel_prefs prefs (fun prefs -> function | 222 | Xlist.fold t.sel_prefs prefs (fun prefs -> function |
224 | SynsetName s -> StringSet.add prefs s | 223 | SynsetName s -> StringSet.add prefs s |
225 | | _ -> failwith "disambiguate_senses")))) in | 224 | | _ -> failwith "disambiguate_senses")))) in |
@@ -231,12 +230,12 @@ let disambiguate_senses lex_sems group = | @@ -231,12 +230,12 @@ let disambiguate_senses lex_sems group = | ||
231 | let senses = if is_zero then StringSet.add senses "0" else senses in*) | 230 | let senses = if is_zero then StringSet.add senses "0" else senses in*) |
232 | Xlist.iter group (fun id -> | 231 | Xlist.iter group (fun id -> |
233 | let t = ExtArray.get lex_sems id in | 232 | let t = ExtArray.get lex_sems id in |
234 | - ExtArray.set lex_sems id {t with frames=Xlist.map t.frames (fun (selectors,meanings,positions) -> | ||
235 | - let meanings = Xlist.map meanings (fun (name,hipero,weight) -> | 233 | + ExtArray.set lex_sems id {t with frames=Xlist.map t.frames (fun frame -> |
234 | + let meanings = Xlist.map frame.meanings (fun (name,hipero,weight) -> | ||
236 | let hipero = Xlist.fold hipero ["ALL",0] (fun hipero (name,cost) -> | 235 | let hipero = Xlist.fold hipero ["ALL",0] (fun hipero (name,cost) -> |
237 | if StringSet.mem prefs name then (name,cost) :: hipero else hipero) in | 236 | if StringSet.mem prefs name then (name,cost) :: hipero else hipero) in |
238 | name,hipero,weight) in | 237 | name,hipero,weight) in |
239 | - selectors,meanings,positions)}) | 238 | + {frame with meanings=meanings})}) |
240 | 239 | ||
241 | let remove_unused_tokens tokens groups = | 240 | let remove_unused_tokens tokens groups = |
242 | let set = Xlist.fold groups IntSet.empty (fun set group -> | 241 | let set = Xlist.fold groups IntSet.empty (fun set group -> |
lexSemantics/ENIAMlexSemanticsStringOf.ml
@@ -27,6 +27,11 @@ let lex_sems t = | @@ -27,6 +27,11 @@ let lex_sems t = | ||
27 | let t2 = ExtArray.get t id in | 27 | let t2 = ExtArray.get t id in |
28 | (Printf.sprintf "%3d %s" id (lex_sem t2)) :: l)))*) | 28 | (Printf.sprintf "%3d %s" id (lex_sem t2)) :: l)))*) |
29 | 29 | ||
30 | +let arole f = | ||
31 | + (if f.arole = "" then "" else "," ^ f.arole) ^ | ||
32 | + (if f.arole_attr = "" then "" else "," ^ f.arole_attr) ^ | ||
33 | + (if f.arev then ",rev" else "") | ||
34 | + | ||
30 | let string_of_lex_sems tokens lex_sems = | 35 | let string_of_lex_sems tokens lex_sems = |
31 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id -> | 36 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id -> |
32 | let t = ExtArray.get lex_sems id in | 37 | let t = ExtArray.get lex_sems id in |
@@ -44,9 +49,9 @@ let string_of_lex_sems tokens lex_sems = | @@ -44,9 +49,9 @@ let string_of_lex_sems tokens lex_sems = | ||
44 | String.concat "," (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in | 49 | String.concat "," (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in |
45 | let senses = Xlist.map t.senses (fun (sense,hipero,weight) -> | 50 | let senses = Xlist.map t.senses (fun (sense,hipero,weight) -> |
46 | Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight) in*) | 51 | Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight) in*) |
47 | - let frames = Xlist.map t.frames (fun (selectors,meanings,schema) -> | ||
48 | - "*[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^ | ||
49 | - String.concat "," (Xlist.map meanings (fun (sense,hipero,weight) -> | 52 | + let frames = Xlist.map t.frames (fun f -> |
53 | + "*" ^ arole f ^ "[" ^ ENIAMcategoriesPL.string_of_selectors f.selectors ^ "] {" ^ ENIAMwalStringOf.schema f.positions ^ "} " ^ | ||
54 | + String.concat "," (Xlist.map f.meanings (fun (sense,hipero,weight) -> | ||
50 | Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight))) in | 55 | Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight))) in |
51 | (String.concat "\n " ([core] @ (*senses @*) schemata @ frames @ lex_entries)) :: l))) | 56 | (String.concat "\n " ([core] @ (*senses @*) schemata @ frames @ lex_entries)) :: l))) |
52 | (* let lroles = if snd t.lroles = "" then fst t.lroles else fst t.lroles ^ " " ^ snd t.lroles in | 57 | (* let lroles = if snd t.lroles = "" then fst t.lroles else fst t.lroles ^ " " ^ snd t.lroles in |
lexSemantics/ENIAMlexSemanticsTypes.ml
@@ -36,20 +36,30 @@ type semantics = | @@ -36,20 +36,30 @@ type semantics = | ||
36 | | SpecialMod of string * (type_arg list * type_term)*) | 36 | | SpecialMod of string * (type_arg list * type_term)*) |
37 | | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *) | 37 | | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *) |
38 | 38 | ||
39 | +type frame = { | ||
40 | + selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list; | ||
41 | + meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list; | ||
42 | + positions: ENIAMwalTypes.position list; | ||
43 | + arole: string; | ||
44 | + arole_attr: string; | ||
45 | + arev: bool; | ||
46 | + } | ||
47 | + | ||
48 | +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false} | ||
49 | + | ||
39 | type lex_sem = { | 50 | type lex_sem = { |
40 | schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * | 51 | schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
41 | (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; | 52 | (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; |
42 | lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * | 53 | lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
43 | ENIAM_LCGtypes.grammar_symbol) list; | 54 | ENIAM_LCGtypes.grammar_symbol) list; |
44 | - frames: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * | ||
45 | - ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list * ENIAMwalTypes.position list) list; | 55 | + frames: frame list; |
46 | cats: string list; | 56 | cats: string list; |
47 | (* e: labels; *) | 57 | (* e: labels; *) |
48 | (* valence: (int * ENIAMwalTypes.frame) list; | 58 | (* valence: (int * ENIAMwalTypes.frame) list; |
49 | simple_valence: (int * ENIAMwalTypes.frame) list; | 59 | simple_valence: (int * ENIAMwalTypes.frame) list; |
50 | very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *) | 60 | very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *) |
51 | (* senses: (string * (string * int) list * float) list; *) | 61 | (* senses: (string * (string * int) list * float) list; *) |
52 | - lroles: string * string; | 62 | + (* lroles: string * string; *) |
53 | semantics: semantics; | 63 | semantics: semantics; |
54 | } | 64 | } |
55 | 65 | ||
@@ -64,7 +74,7 @@ type lex_sem = { | @@ -64,7 +74,7 @@ type lex_sem = { | ||
64 | let empty_lex_sem = { | 74 | let empty_lex_sem = { |
65 | schemata=[]; lex_entries=[]; frames=[]; cats=["X"]; | 75 | schemata=[]; lex_entries=[]; frames=[]; cats=["X"]; |
66 | (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*) | 76 | (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*) |
67 | - lroles="",""; semantics=Normal} | 77 | + (*lroles="","";*) semantics=Normal} |
68 | 78 | ||
69 | (* FIXME: poprawić katalog *) | 79 | (* FIXME: poprawić katalog *) |
70 | (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat" | 80 | (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat" |
lexSemantics/makefile
@@ -6,8 +6,8 @@ OCAMLFLAGS=$(INCLUDES) -g | @@ -6,8 +6,8 @@ OCAMLFLAGS=$(INCLUDES) -g | ||
6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa |
7 | INSTALLDIR=`ocamlc -where`/eniam | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | ||
9 | -SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMadjuncts.ml \ | ||
10 | - ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml | 9 | +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMlexSemanticsTypes.ml ENIAMadjuncts.ml \ |
10 | + ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml | ||
11 | 11 | ||
12 | all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa | 12 | all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa |
13 | 13 |